/* Small compiler - Binary code generation (the "assembler") * * Copyright (c) ITB CompuPhase, 1997-2003 * * This software is provided "as-is", without any express or implied warranty. * In no event will the authors be held liable for any damages arising from * the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software in * a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * Version: $Id$ */ #include #include #include /* for macro max() */ #include #include #if defined LINUX # include "embryo_cc_osdefs.h" # include #endif #if defined FORTIFY # include "fortify.h" #endif #include "embryo_cc_sc.h" typedef cell (*OPCODE_PROC)(FILE *fbin,char *params,cell opcode); typedef struct { cell opcode; char *name; int segment; /* sIN_CSEG=parse in cseg, sIN_DSEG=parse in dseg */ OPCODE_PROC func; } OPCODE; static cell codeindex; /* similar to "code_idx" */ static cell *lbltab; /* label table */ static int writeerror; static int bytes_in, bytes_out; /* apparently, strtol() does not work correctly on very large (unsigned) * hexadecimal values */ static ucell hex2long(char *s,char **n) { unsigned long result=0L; int negate=FALSE; int digit; /* ignore leading whitespace */ while (*s==' ' || *s=='\t') s++; /* allow a negation sign to create the two's complement of numbers */ if (*s=='-') { negate=TRUE; s++; } /* if */ assert((*s>='0' && *s<='9') || (*s>='a' && *s<='f') || (*s>='a' && *s<='f')); for ( ;; ) { if (*s>='0' && *s<='9') digit=*s-'0'; else if (*s>='a' && *s<='f') digit=*s-'a' + 10; else if (*s>='A' && *s<='F') digit=*s-'A' + 10; else break; /* probably whitespace */ result=(result<<4) | digit; s++; } /* for */ if (n!=NULL) *n=s; if (negate) result=(~result)+1; /* take two's complement of the result */ return (ucell)result; } #if BYTE_ORDER==BIG_ENDIAN static short *align16(short *v) { unsigned char *s = (unsigned char *)v; unsigned char t; /* swap two bytes */ t=s[0]; s[0]=s[1]; s[1]=t; return v; } static long *align32(long *v) { unsigned char *s = (unsigned char *)v; unsigned char t; /* swap outer two bytes */ t=s[0]; s[0]=s[3]; s[3]=t; /* swap inner two bytes */ t=s[1]; s[1]=s[2]; s[2]=t; return v; } #if defined BIT16 #define aligncell(v) align16(v) #else #define aligncell(v) align32(v) #endif #else #define align16(v) (v) #define align32(v) (v) #define aligncell(v) (v) #endif static char *skipwhitespace(char *str) { while (isspace(*str)) str++; return str; } static char *stripcomment(char *str) { char *ptr=strchr(str,';'); if (ptr!=NULL) { *ptr++='\n'; /* terminate the line, but leave the '\n' */ *ptr='\0'; } /* if */ return str; } static void write_encoded(FILE *fbin,ucell *c,int num) { assert(sizeof(cell)<=4); /* code must be adjusted for larger cells */ assert(fbin!=NULL); while (num-->0) { if (sc_compress) { ucell p=(ucell)*c; unsigned char t[5]; /* a 32-bit cell is encoded in max. 5 bytes (3 bytes for a 16-bit cell) */ unsigned char code; int index; for (index=0; index<5; index++) { t[index]=(unsigned char)(p & 0x7f); /* store 7 bits */ p>>=7; } /* for */ /* skip leading zeros */ while (index>1 && t[index-1]==0 && (t[index-2] & 0x40)==0) index--; /* skip leading -1s */ /* ??? for BIT16, check for index==3 && t[index-1]==0x03 */ if (index==5 && t[index-1]==0x0f && (t[index-2] & 0x40)!=0) index--; while (index>1 && t[index-1]==0x7f && (t[index-2] & 0x40)!=0) index--; /* write high byte first, write continuation bits */ assert(index>0); while (index-->0) { code=(unsigned char)((index==0) ? t[index] : (t[index]|0x80)); writeerror |= !sc_writebin(fbin,&code,1); bytes_out++; } /* while */ bytes_in+=sizeof *c; assert(AMX_EXPANDMARGIN>2); if (bytes_out-bytes_in>=AMX_EXPANDMARGIN-2) error(106); /* compression buffer overflow */ } else { assert((sc_lengthbin(fbin) % sizeof(cell)) == 0); writeerror |= !sc_writebin(fbin,aligncell(c),sizeof *c); } /* if */ c++; } /* while */ } #if defined __BORLANDC__ || defined __WATCOMC__ #pragma argsused #endif static cell noop(FILE *fbin,char *params,cell opcode) { return 0; } #if defined __BORLANDC__ || defined __WATCOMC__ #pragma argsused #endif static cell parm0(FILE *fbin,char *params,cell opcode) { if (fbin!=NULL) write_encoded(fbin,(ucell*)&opcode,1); return opcodes(1); } static cell parm1(FILE *fbin,char *params,cell opcode) { ucell p=hex2long(params,NULL); if (fbin!=NULL) { write_encoded(fbin,(ucell*)&opcode,1); write_encoded(fbin,&p,1); } /* if */ return opcodes(1)+opargs(1); } static cell parm2(FILE *fbin,char *params,cell opcode) { ucell p[2]; p[0]=hex2long(params,¶ms); p[1]=hex2long(params,NULL); if (fbin!=NULL) { write_encoded(fbin,(ucell*)&opcode,1); write_encoded(fbin,p,2); } /* if */ return opcodes(1)+opargs(2); } #if defined __BORLANDC__ || defined __WATCOMC__ #pragma argsused #endif static cell do_dump(FILE *fbin,char *params,cell opcode) { ucell p; int num = 0; while (*params!='\0') { p=hex2long(params,¶ms); if (fbin!=NULL) write_encoded(fbin,&p,1); num++; while (isspace(*params)) params++; } /* while */ return num*sizeof(cell); } static cell do_call(FILE *fbin,char *params,cell opcode) { char name[sNAMEMAX+1]; int i; symbol *sym; ucell p; for (i=0; !isspace(*params); i++,params++) { assert(*params!='\0'); assert(iident==iFUNCTN || sym->ident==iREFFUNC); assert(sym->vclass==sGLOBAL); p=sym->addr; if (fbin!=NULL) { write_encoded(fbin,(ucell*)&opcode,1); write_encoded(fbin,&p,1); } /* if */ return opcodes(1)+opargs(1); } static cell do_jump(FILE *fbin,char *params,cell opcode) { int i; ucell p; i=(int)hex2long(params,NULL); assert(i>=0 && i0 && isspace(params[len-1])) len--; params[len++]='\0'; /* zero-terminate */ while (len % sizeof(cell) != 0) params[len++]='\0'; /* pad with zeros up to full cell */ assert(len>0 && len<256); clen=len+sizeof(cell); /* add size of file ordinal */ if (fbin!=NULL) { write_encoded(fbin,(ucell*)&opcode,1); write_encoded(fbin,&clen,1); write_encoded(fbin,&p,1); write_encoded(fbin,(ucell*)params,len/sizeof(cell)); } /* if */ return opcodes(1)+opargs(1)+clen; /* other argument is in clen */ } static cell do_symbol(FILE *fbin,char *params,cell opcode) { char *endptr; ucell offset, clen, flags; int len; unsigned char mclass,type; for (endptr=params; !isspace(*endptr) && endptr!='\0'; endptr++) /* nothing */; assert(*endptr==' '); len=(int)(endptr-params); assert(len>0 && len=0 && i=0 && i=MAX_INSTR_LEN) return 0; strncpy(str,instr,maxlen); str[maxlen]='\0'; /* make sure the string is zero terminated */ /* look up the instruction with a binary search * the assembler is case insensitive to instructions (but case sensitive * to symbols) */ low=1; /* entry 0 is reserved (for "not found") */ high=(sizeof opcodelist / sizeof opcodelist[0])-1; while (low0) low=mid+1; else high=mid; } /* while */ assert(low==high); if (stricmp(str,opcodelist[low].name)==0) return low; /* found */ return 0; /* not found, return special index */ } SC_FUNC void assemble(FILE *fout,FILE *fin) { typedef struct tagFUNCSTUB { uint32_t address,nameofs; } FUNCSTUB; AMX_HEADER hdr; FUNCSTUB func; int numpublics,numnatives,numlibraries,numpubvars,numtags,padding; long nametablesize,nameofs; char line[256],*instr,*params; int i,pass; int16_t count; symbol *sym, **nativelist; constvalue *constptr; cell mainaddr; #if !defined NDEBUG /* verify that the opcode list is sorted (skip entry 1; it is reserved * for a non-existant opcode) */ assert(opcodelist[1].name!=NULL); for (i=2; i<(sizeof opcodelist / sizeof opcodelist[0]); i++) { assert(opcodelist[i].name!=NULL); assert(stricmp(opcodelist[i].name,opcodelist[i-1].name)>0); } /* for */ #endif writeerror=FALSE; nametablesize=sizeof(int16_t); numpublics=0; numnatives=0; numpubvars=0; mainaddr=-1; /* count number of public and native functions and public variables */ for (sym=glbtab.next; sym!=NULL; sym=sym->next) { char alias[sNAMEMAX+1]=""; int match=0; if (sym->ident==iFUNCTN) { assert(strlen(sym->name)<=sNAMEMAX); if ((sym->usage & uNATIVE)!=0 && (sym->usage & uREAD)!=0 && sym->addr>=0) { match=++numnatives; if (!lookup_alias(alias,sym->name)) strcpy(alias,sym->name); } /* if */ if ((sym->usage & uPUBLIC)!=0 && (sym->usage & uDEFINE)!=0) { match=++numpublics; strcpy(alias,sym->name); } /* if */ if (strcmp(sym->name,uMAINFUNC)==0) { assert(sym->vclass==sGLOBAL); mainaddr=sym->addr; } /* if */ } else if (sym->ident==iVARIABLE) { if ((sym->usage & uPUBLIC)!=0) { match=++numpubvars; strcpy(alias,sym->name); } /* if */ } /* if */ if (match) { assert(strlen(alias)>0); nametablesize+=strlen(alias)+1; } /* if */ } /* for */ assert(numnatives==ntv_funcid); /* count number of libraries */ numlibraries=0; for (constptr=libname_tab.next; constptr!=NULL; constptr=constptr->next) { if (constptr->value>0) { assert(strlen(constptr->name)>0); numlibraries++; nametablesize+=strlen(constptr->name)+1; } /* if */ } /* for */ /* count number of public tags */ numtags=0; for (constptr=tagname_tab.next; constptr!=NULL; constptr=constptr->next) { if ((constptr->value & PUBLICTAG)!=0) { assert(strlen(constptr->name)>0); numtags++; nametablesize+=strlen(constptr->name)+1; } /* if */ } /* for */ /* pad the header to sc_dataalign * => thereby the code segment is aligned * => since the code segment is padded to a sc_dataalign boundary, the data segment is aligned * => and thereby the stack top is aligned too */ assert(sc_dataalign!=0); padding= sc_dataalign - (sizeof hdr + nametablesize) % sc_dataalign; if (padding==sc_dataalign) padding=0; /* write the abstract machine header */ memset(&hdr, 0, sizeof hdr); hdr.magic=(unsigned short)0xF1E0; hdr.file_version=CUR_FILE_VERSION; hdr.amx_version=MIN_AMX_VERSION; hdr.flags=(short)(sc_debug & sSYMBOLIC); if (charbits==16) hdr.flags|=AMX_FLAG_CHAR16; if (sc_compress) hdr.flags|=AMX_FLAG_COMPACT; if (sc_debug==0) hdr.flags|=AMX_FLAG_NOCHECKS; #if BYTE_ORDER==BIG_ENDIAN hdr.flags|=AMX_FLAG_BIGENDIAN; #endif hdr.defsize=sizeof(FUNCSTUB); assert((hdr.defsize % sizeof(cell))==0); hdr.publics=sizeof hdr; /* public table starts right after the header */ hdr.natives=hdr.publics + numpublics*sizeof(FUNCSTUB); hdr.libraries=hdr.natives + numnatives*sizeof(FUNCSTUB); hdr.pubvars=hdr.libraries + numlibraries*sizeof(FUNCSTUB); hdr.tags=hdr.pubvars + numpubvars*sizeof(FUNCSTUB); hdr.nametable=hdr.tags + numtags*sizeof(FUNCSTUB); hdr.cod=hdr.nametable + nametablesize + padding; hdr.dat=hdr.cod + code_idx; hdr.hea=hdr.dat + glb_declared*sizeof(cell); hdr.stp=hdr.hea + sc_stksize*sizeof(cell); hdr.cip=mainaddr; hdr.size=hdr.hea; /* preset, this is incorrect in case of compressed output */ #if BYTE_ORDER==BIG_ENDIAN align32(&hdr.size); align16(&hdr.magic); align16(&hdr.flags); align16(&hdr.defsize); align32(&hdr.publics); align32(&hdr.natives); align32(&hdr.libraries); align32(&hdr.pubvars); align32(&hdr.tags); align32(&hdr.nametable); align32(&hdr.cod); align32(&hdr.dat); align32(&hdr.hea); align32(&hdr.stp); align32(&hdr.cip); #endif sc_writebin(fout,&hdr,sizeof hdr); /* dump zeros up to the rest of the header, so that we can easily "seek" */ for (nameofs=sizeof hdr; nameofsnext) { if (sym->ident==iFUNCTN && (sym->usage & uPUBLIC)!=0 && (sym->usage & uDEFINE)!=0) { assert(sym->vclass==sGLOBAL); func.address=sym->addr; func.nameofs=nameofs; #if BYTE_ORDER==BIG_ENDIAN align32(&func.address); align32(&func.nameofs); #endif fseek(fout,hdr.publics+count*sizeof(FUNCSTUB),SEEK_SET); sc_writebin(fout,&func,sizeof func); fseek(fout,func.nameofs,SEEK_SET); sc_writebin(fout,sym->name,strlen(sym->name)+1); nameofs+=strlen(sym->name)+1; count++; } /* if */ } /* for */ /* write the natives table */ /* The native functions must be written in sorted order. (They are * sorted on their "id", not on their name). A nested loop to find * each successive function would be an O(n^2) operation. But we * do not really need to sort, because the native function id's * are sequential and there are no duplicates. So we first walk * through the complete symbol list and store a pointer to every * native function of interest in a temporary table, where its id * serves as the index in the table. Now we can walk the table and * have all native functions in sorted order. */ if (numnatives>0) { nativelist=(symbol **)malloc(numnatives*sizeof(symbol *)); if (nativelist==NULL) error(103); /* insufficient memory */ #if !defined NDEBUG memset(nativelist,0,numnatives*sizeof(symbol *)); /* for NULL checking */ #endif for (sym=glbtab.next; sym!=NULL; sym=sym->next) { if (sym->ident==iFUNCTN && (sym->usage & uNATIVE)!=0 && (sym->usage & uREAD)!=0 && sym->addr>=0) { assert(sym->addr < numnatives); nativelist[(int)sym->addr]=sym; } /* if */ } /* for */ count=0; for (i=0; iname)) { assert(strlen(sym->name)<=sNAMEMAX); strcpy(alias,sym->name); } /* if */ assert(sym->vclass==sGLOBAL); func.address=0; func.nameofs=nameofs; #if BYTE_ORDER==BIG_ENDIAN align32(&func.address); align32(&func.nameofs); #endif fseek(fout,hdr.natives+count*sizeof(FUNCSTUB),SEEK_SET); sc_writebin(fout,&func,sizeof func); fseek(fout,func.nameofs,SEEK_SET); sc_writebin(fout,alias,strlen(alias)+1); nameofs+=strlen(alias)+1; count++; } /* for */ free(nativelist); } /* if */ /* write the libraries table */ count=0; for (constptr=libname_tab.next; constptr!=NULL; constptr=constptr->next) { if (constptr->value>0) { assert(strlen(constptr->name)>0); func.address=0; func.nameofs=nameofs; #if BYTE_ORDER==BIG_ENDIAN align32(&func.address); align32(&func.nameofs); #endif fseek(fout,hdr.libraries+count*sizeof(FUNCSTUB),SEEK_SET); sc_writebin(fout,&func,sizeof func); fseek(fout,func.nameofs,SEEK_SET); sc_writebin(fout,constptr->name,strlen(constptr->name)+1); nameofs+=strlen(constptr->name)+1; count++; } /* if */ } /* for */ /* write the public variables table */ count=0; for (sym=glbtab.next; sym!=NULL; sym=sym->next) { if (sym->ident==iVARIABLE && (sym->usage & uPUBLIC)!=0) { assert((sym->usage & uDEFINE)!=0); assert(sym->vclass==sGLOBAL); func.address=sym->addr; func.nameofs=nameofs; #if BYTE_ORDER==BIG_ENDIAN align32(&func.address); align32(&func.nameofs); #endif fseek(fout,hdr.pubvars+count*sizeof(FUNCSTUB),SEEK_SET); sc_writebin(fout,&func,sizeof func); fseek(fout,func.nameofs,SEEK_SET); sc_writebin(fout,sym->name,strlen(sym->name)+1); nameofs+=strlen(sym->name)+1; count++; } /* if */ } /* for */ /* write the public tagnames table */ count=0; for (constptr=tagname_tab.next; constptr!=NULL; constptr=constptr->next) { if ((constptr->value & PUBLICTAG)!=0) { assert(strlen(constptr->name)>0); func.address=constptr->value & TAGMASK; func.nameofs=nameofs; #if BYTE_ORDER==BIG_ENDIAN align32(&func.address); align32(&func.nameofs); #endif fseek(fout,hdr.tags+count*sizeof(FUNCSTUB),SEEK_SET); sc_writebin(fout,&func,sizeof func); fseek(fout,func.nameofs,SEEK_SET); sc_writebin(fout,constptr->name,strlen(constptr->name)+1); nameofs+=strlen(constptr->name)+1; count++; } /* if */ } /* for */ /* write the "maximum name length" field in the name table */ assert(nameofs==hdr.nametable+nametablesize); fseek(fout,hdr.nametable,SEEK_SET); count=sNAMEMAX; #if BYTE_ORDER==BIG_ENDIAN align16(&count); #endif sc_writebin(fout,&count,sizeof count); fseek(fout,hdr.cod,SEEK_SET); /* First pass: relocate all labels */ /* This pass is necessary because the code addresses of labels is only known * after the peephole optimization flag. Labels can occur inside expressions * (e.g. the conditional operator), which are optimized. */ lbltab=NULL; if (labnum>0) { /* only very short programs have zero labels; no first pass is needed * if there are no labels */ lbltab=(cell *)malloc(labnum*sizeof(cell)); if (lbltab==NULL) error(103); /* insufficient memory */ codeindex=0; sc_resetasm(fin); while (sc_readasm(fin,line,sizeof line)!=NULL) { stripcomment(line); instr=skipwhitespace(line); /* ignore empty lines */ if (*instr=='\0') continue; if (tolower(*instr)=='l' && *(instr+1)=='.') { int lindex=(int)hex2long(instr+2,NULL); assert(lindexinstr); i=findopcode(instr,(int)(params-instr)); if (opcodelist[i].name==NULL) { *params='\0'; error(104,instr); /* invalid assembler instruction */ } /* if */ if (opcodelist[i].segment==sIN_CSEG) codeindex+=opcodelist[i].func(NULL,skipwhitespace(params),opcodelist[i].opcode); } /* if */ } /* while */ } /* if */ /* Second pass (actually 2 more passes, one for all code and one for all data) */ bytes_in=0; bytes_out=0; for (pass=sIN_CSEG; pass<=sIN_DSEG; pass++) { sc_resetasm(fin); while (sc_readasm(fin,line,sizeof line)!=NULL) { stripcomment(line); instr=skipwhitespace(line); /* ignore empty lines and labels (labels have a special syntax, so these * must be parsed separately) */ if (*instr=='\0' || tolower(*instr)=='l' && *(instr+1)=='.') continue; /* get to the end of the instruction (make use of the '\n' that fgets() * added at the end of the line; this way we will *always* drop on a * whitespace character) */ for (params=instr; *params!='\0' && !isspace(*params); params++) /* nothing */; assert(params>instr); i=findopcode(instr,(int)(params-instr)); assert(opcodelist[i].name!=NULL); if (opcodelist[i].segment==pass) opcodelist[i].func(fout,skipwhitespace(params),opcodelist[i].opcode); } /* while */ } /* for */ if (bytes_out-bytes_in>0) error(106); /* compression buffer overflow */ if (lbltab!=NULL) { free(lbltab); #if !defined NDEBUG lbltab=NULL; #endif } /* if */ if (writeerror) error(101,"disk full"); /* adjust the header */ if (sc_compress) { hdr.size=sc_lengthbin(fout); #if BYTE_ORDER==BIG_ENDIAN align32(&hdr.size); #endif sc_resetbin(fout); /* "size" is the very first field */ sc_writebin(fout,&hdr.size,sizeof hdr.size); } /* if */ }