summaryrefslogblamecommitdiff
path: root/src/bin/embryo/embryo_cc_sc7.c
blob: f18302d229d3109c39c1a10c40b6bbd84f8f1459 (plain) (tree)






































                                                                               
 
 



                    

                   
                                                

                   
 


                         

                                                                                    




                            
                    

      

                                                      



                         

                                                                        


                                                                                  

                                
 
                         
                                                                                         







                                                                                   
              


                                                         
          



                                                                                   

 
    
                       
 
              




                                        



















                                                                            
    
                  
 




                                        

 

                    
 


                                    


















                                                                              
    
                  
 


























                                                            











                                                                          
    
               
 

                

                                            

 


                                   



















                                                                              
                                   


                                                               

                                 
 










                                                                      
                        






















































                                                                                              






                                                                         
                                                 
   
    
                                
 

               
                     

                                        

 
   
                                   
 

               
                      


                                        











                                                                             
    
                 
 








                                                                         
                              


                                        






                                                                        
                              
 
   
                
 



                                      
                                                         


                                                                             
                                                                    















                                                                             

                                              
                                                
                              



                                                                                

                                              
                                                   
                                 

                                                          
                                                        



                                         

 
   
                   
 

                         
                 

              
                                                         
           
                                   
                                        
                                      






                                           


                         

                                                                        
 


                                                                          
 






                                          
 









                                                  
                                          





















                                                                         
                                                                     


                        
                                                                     





                                                               
                                                                        











                                                                                  

 


                                                                        
 








                                                                             
                               







                                              
                                       














                                                                                
                                        



                    











                                                                             
                                          




















                                                                                 

 


                                                                      
 






                                                         










                                                                            

                              
 


                                                           
                             









                                                                    
                                        





































                                                                                               
                                                 




                                                            


                   
/*  Small compiler - Staging buffer and optimizer
 *
 *  The staging buffer
 *  ------------------
 *  The staging buffer allows buffered output of generated code, deletion
 *  of redundant code, optimization by a tinkering process and reversing
 *  the ouput of evaluated expressions (which is used for the reversed
 *  evaluation of arguments in functions).
 *  Initially, stgwrite() writes to the file directly, but after a call to
 *  stgset(TRUE), output is redirected to the buffer. After a call to
 *  stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
 *  only one routine is used for writing to the output, which can be
 *  buffered output or direct output.
 *
 *  staging buffer variables:   stgbuf  - the buffer
 *                              stgidx  - current index in the staging buffer
 *                              staging - if true, write to the staging buffer;
 *                                        if false, write to file directly.
 *
 *  Copyright (c) ITB CompuPhase, 1997-2003
 *
 *  This software is provided "as-is", without any express or implied warranty.
 *  In no event will the authors be held liable for any damages arising from
 *  the use of this software.
 *
 *  Permission is granted to anyone to use this software for any purpose,
 *  including commercial applications, and to alter it and redistribute it
 *  freely, subject to the following restrictions:
 *
 *  1.  The origin of this software must not be misrepresented; you must not
 *      claim that you wrote the original software. If you use this software in
 *      a product, an acknowledgment in the product documentation would be
 *      appreciated but is not required.
 *  2.  Altered source versions must be plainly marked as such, and must not be
 *      misrepresented as being the original software.
 *  3.  This notice may not be removed or altered from any source distribution.
 *
 *  Version: $Id$
 */


#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>		/* for atoi() */
#include <string.h>
#include <ctype.h>

#include "embryo_cc_sc.h"

#if defined _MSC_VER
#pragma warning(push)
#pragma warning(disable:4125)	/* decimal digit terminates octal escape sequence */
#endif

#include "embryo_cc_sc7.scp"

#if defined _MSC_VER
#pragma warning(pop)
#endif

static void         stgstring(char *start, char *end);
static void         stgopt(char *start, char *end);

#define sSTG_GROW   512
#define sSTG_MAX    20480

static char        *stgbuf = NULL;
static int          stgmax = 0;	/* current size of the staging buffer */

#define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)

static void
grow_stgbuffer(int requiredsize)
{
   char               *p;
   int                 clear = !stgbuf;	/* if previously none, empty buffer explicitly */

   assert(stgmax < requiredsize);
   /* if the staging buffer (holding intermediate code for one line) grows
    * over a few kBytes, there is probably a run-away expression
    */
   if (requiredsize > sSTG_MAX)
      error(102, "staging buffer");	/* staging buffer overflow (fatal error) */
   stgmax = requiredsize + sSTG_GROW;
   if (stgbuf)
      p = (char *)realloc(stgbuf, stgmax * sizeof(char));
   else
      p = (char *)malloc(stgmax * sizeof(char));
   if (!p)
      error(102, "staging buffer");	/* staging buffer overflow (fatal error) */
   stgbuf = p;
   if (clear)
      *stgbuf = '\0';
}

void
stgbuffer_cleanup(void)
{
   if (stgbuf)
     {
	free(stgbuf);
	stgbuf = NULL;
	stgmax = 0;
     }				/* if */
}

/* the variables "stgidx" and "staging" are declared in "scvars.c" */

/*  stgmark
 *
 *  Copies a mark into the staging buffer. At this moment there are three
 *  possible marks:
 *     sSTARTREORDER    identifies the beginning of a series of expression
 *                      strings that must be written to the output file in
 *                      reordered order
 *    sENDREORDER       identifies the end of 'reverse evaluation'
 *    sEXPRSTART + idx  only valid within a block that is evaluated in
 *                      reordered order, it identifies the start of an
 *                      expression; the "idx" value is the argument position
 *
 *  Global references: stgidx  (altered)
 *                     stgbuf  (altered)
 *                     staging (referred to only)
 */
void
stgmark(char mark)
{
   if (staging)
     {
	CHECK_STGBUFFER(stgidx);
	stgbuf[stgidx++] = mark;
     }				/* if */
}

static int
filewrite(char *str)
{
   if (sc_status == statWRITE)
      return sc_writeasm(outf, str);
   return TRUE;
}

/*  stgwrite
 *
 *  Writes the string "st" to the staging buffer or to the output file. In the
 *  case of writing to the staging buffer, the terminating byte of zero is
 *  copied too, but... the optimizer can only work on complete lines (not on
 *  fractions of it. Therefore if the string is staged, if the last character
 *  written to the buffer is a '\0' and the previous-to-last is not a '\n',
 *  the string is concatenated to the last string in the buffer (the '\0' is
 *  overwritten). This also means an '\n' used in the middle of a string isn't
 *  recognized and could give wrong results with the optimizer.
 *  Even when writing to the output file directly, all strings are buffered
 *  until a whole line is complete.
 *
 *  Global references: stgidx  (altered)
 *                     stgbuf  (altered)
 *                     staging (referred to only)
 */
void
stgwrite(char *st)
{
   int                 len;

   CHECK_STGBUFFER(0);
   if (staging)
     {
	if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
	    && stgbuf[stgidx - 2] != '\n')
	   stgidx -= 1;		/* overwrite last '\0' */
	while (*st != '\0')
	  {			/* copy to staging buffer */
	     CHECK_STGBUFFER(stgidx);
	     stgbuf[stgidx++] = *st++;
	  }			/* while */
	CHECK_STGBUFFER(stgidx);
	stgbuf[stgidx++] = '\0';
     }
   else
     {
	CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
	strcat(stgbuf, st);
	len = strlen(stgbuf);
	if (len > 0 && stgbuf[len - 1] == '\n')
	  {
	     filewrite(stgbuf);
	     stgbuf[0] = '\0';
	  }			/* if */
     }				/* if */
}

/*  stgout
 *
 *  Writes the staging buffer to the output file via stgstring() (for
 *  reversing expressions in the buffer) and stgopt() (for optimizing). It
 *  resets "stgidx".
 *
 *  Global references: stgidx  (altered)
 *                     stgbuf  (referred to only)
 *                     staging (referred to only)
 */
void
stgout(int idx)
{
   if (!staging)
      return;
   stgstring(&stgbuf[idx], &stgbuf[stgidx]);
   stgidx = idx;
}

typedef struct
{
   char               *start, *end;
} argstack;

/*  stgstring
 *
 *  Analyses whether code strings should be output to the file as they appear
 *  in the staging buffer or whether portions of it should be re-ordered.
 *  Re-ordering takes place in function argument lists; Small passes arguments
 *  to functions from right to left. When arguments are "named" rather than
 *  positional, the order in the source stream is indeterminate.
 *  This function calls itself recursively in case it needs to re-order code
 *  strings, and it uses a private stack (or list) to mark the start and the
 *  end of expressions in their correct (reversed) order.
 *  In any case, stgstring() sends a block as large as possible to the
 *  optimizer stgopt().
 *
 *  In "reorder" mode, each set of code strings must start with the token
 *  sEXPRSTART, even the first. If the token sSTARTREORDER is represented
 *  by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
 *     '[]...'     valid, but useless; no output
 *     '[|...]     valid, but useless; only one string
 *     '[|...|...] valid and useful
 *     '[...|...]  invalid, first string doesn't start with '|'
 *     '[|...|]    invalid
 */
static void
stgstring(char *start, char *end)
{
   char               *ptr;
   int                 nest, argc, arg;
   argstack           *stack;

   while (start < end)
     {
	if (*start == sSTARTREORDER)
	  {
	     start += 1;	/* skip token */
	     /* allocate a argstack with sMAXARGS items */
	     stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
	     if (!stack)
		error(103);	/* insufficient memory */
	     nest = 1;		/* nesting counter */
	     argc = 0;		/* argument counter */
	     arg = -1;		/* argument index; no valid argument yet */
	     do
	       {
		  switch (*start)
		    {
		    case sSTARTREORDER:
		       nest++;
		       start++;
		       break;
		    case sENDREORDER:
		       nest--;
		       start++;
		       break;
		    default:
		       if ((*start & sEXPRSTART) == sEXPRSTART)
			 {
			    if (nest == 1)
			      {
				 if (arg >= 0)
				    stack[arg].end = start - 1;	/* finish previous argument */
				 arg = (unsigned char)*start - sEXPRSTART;
				 stack[arg].start = start + 1;
				 if (arg >= argc)
				    argc = arg + 1;
			      }	/* if */
			    start++;
			 }
		       else
			 {
			    start += strlen(start) + 1;
			 }	/* if */
		    }		/* switch */
	       }
	     while (nest);	/* enddo */
	     if (arg >= 0)
		stack[arg].end = start - 1;	/* finish previous argument */
	     while (argc > 0)
	       {
		  argc--;
		  stgstring(stack[argc].start, stack[argc].end);
	       }		/* while */
	     free(stack);
	  }
	else
	  {
	     ptr = start;
	     while (ptr < end && *ptr != sSTARTREORDER)
		ptr += strlen(ptr) + 1;
	     stgopt(start, ptr);
	     start = ptr;
	  }			/* if */
     }				/* while */
}

/*  stgdel
 *
 *  Scraps code from the staging buffer by resetting "stgidx" to "index".
 *
 *  Global references: stgidx (altered)
 *                     staging (referred to only)
 */
void
stgdel(int idx, cell code_index)
{
   if (staging)
     {
	stgidx = idx;
	code_idx = code_index;
     }				/* if */
}

int
stgget(int *idx, cell * code_index)
{
   if (staging)
     {
	*idx = stgidx;
	*code_index = code_idx;
     }				/* if */
   return staging;
}

/*  stgset
 *
 *  Sets staging on or off. If it's turned off, the staging buffer must be
 *  initialized to an empty string. If it's turned on, the routine makes sure
 *  the index ("stgidx") is set to 0 (it should already be 0).
 *
 *  Global references: staging  (altered)
 *                     stgidx   (altered)
 *                     stgbuf   (contents altered)
 */
void
stgset(int onoff)
{
   staging = onoff;
   if (staging)
     {
	assert(stgidx == 0);
	stgidx = 0;
	CHECK_STGBUFFER(stgidx);
	/* write any contents that may be put in the buffer by stgwrite()
	 * when "staging" was 0
	 */
	if (stgbuf[0] != '\0')
	   filewrite(stgbuf);
     }				/* if */
   stgbuf[0] = '\0';
}

/* phopt_init
 * Initialize all sequence strings of the peehole optimizer. The strings
 * are embedded in the .EXE file in compressed format, here we expand
 * them (and allocate memory for the sequences).
 */
static SEQUENCE    *sequences;

int
phopt_init(void)
{
   int                 number, i, len;
   char                str[160];

   /* count number of sequences */
   for (number = 0; sequences_cmp[number].find; number++)
      /* nothing */ ;
   number++;			/* include an item for the NULL terminator */

   if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE))))
      return FALSE;

   /* pre-initialize all to NULL (in case of failure) */
   for (i = 0; i < number; i++)
     {
	sequences[i].find = NULL;
	sequences[i].replace = NULL;
	sequences[i].savesize = 0;
     }				/* for */

   /* expand all strings */
   for (i = 0; i < number - 1; i++)
     {
	len =
	   strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
		     SCPACK_TABLE);
	assert(len <= (int)(sizeof(str)));
	assert(len == (int)(strlen(str) + 1));
	sequences[i].find = (char *)malloc(len);
	if (sequences[i].find)
	   strcpy(sequences[i].find, str);
	len =
	   strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
		     SCPACK_TABLE);
	assert(len <= (int)(sizeof(str)));
	assert(len == (int)(strlen(str) + 1));
	sequences[i].replace = (char *)malloc(len);
	if (sequences[i].replace)
	   strcpy(sequences[i].replace, str);
	sequences[i].savesize = sequences_cmp[i].savesize;
	if (!sequences[i].find || !sequences[i].replace)
	   return phopt_cleanup();
     }				/* for */

   return TRUE;
}

int
phopt_cleanup(void)
{
   int                 i;

   if (sequences)
     {
	i = 0;
	while (sequences[i].find || sequences[i].replace)
	  {
	     if (sequences[i].find)
		free(sequences[i].find);
	     if (sequences[i].replace)
		free(sequences[i].replace);
	     i++;
	  }			/* while */
	free(sequences);
	sequences = NULL;
     }				/* if */
   return FALSE;
}

#define _maxoptvars     4
#define _aliasmax       10	/* a 32-bit number can be represented in
				 * 9 decimal digits */

static int
matchsequence(char *start, char *end, char *pattern,
	      char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
{
   int                 var, i;
   char                str[_aliasmax + 1];
   char               *start_org = start;

   *match_length = 0;
   for (var = 0; var < _maxoptvars; var++)
      symbols[var][0] = '\0';

   while (*start == '\t' || *start == ' ')
      start++;
   while (*pattern)
     {
	if (start >= end)
	   return FALSE;
	switch (*pattern)
	  {
	  case '%':		/* new "symbol" */
	     pattern++;
	     assert(sc_isdigit(*pattern));
	     var = atoi(pattern) - 1;
	     assert(var >= 0 && var < _maxoptvars);
	     assert(alphanum(*start));
	     for (i = 0; start < end && alphanum(*start); i++, start++)
	       {
		  assert(i <= _aliasmax);
		  str[i] = *start;
	       }		/* for */
	     str[i] = '\0';
	     if (symbols[var][0] != '\0')
	       {
		  if (strcmp(symbols[var], str) != 0)
		     return FALSE;	/* symbols should be identical */
	       }
	     else
	       {
		  strcpy(symbols[var], str);
	       }		/* if */
	     break;
	  case ' ':
	     if (*start != '\t' && *start != ' ')
		return FALSE;
	     while ((start < end && *start == '\t') || *start == ' ')
		start++;
	     break;
	  case '!':
	     while ((start < end && *start == '\t') || *start == ' ')
		start++;	/* skip trailing white space */
	     if (*start != '\n')
		return FALSE;
	     assert(*(start + 1) == '\0');
	     start += 2;	/* skip '\n' and '\0' */
	     if (*(pattern + 1) != '\0')
		while ((start < end && *start == '\t') || *start == ' ')
		   start++;	/* skip leading white space of next instruction */
	     break;
	  default:
	     if (tolower(*start) != tolower(*pattern))
		return FALSE;
	     start++;
	  }			/* switch */
	pattern++;
     }				/* while */

   *match_length = (int)(start - start_org);
   return TRUE;
}

static char        *
replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
		int *repl_length)
{
   char               *lptr;
   int                 var;
   char               *buffer;

   /* calculate the length of the new buffer
    * this is the length of the pattern plus the length of all symbols (note
    * that the same symbol may occur multiple times in the pattern) plus
    * line endings and startings ('\t' to start a line and '\n\0' to end one)
    */
   assert(repl_length != NULL);
   *repl_length = 0;
   lptr = pattern;
   while (*lptr)
     {
	switch (*lptr)
	  {
	  case '%':
	     lptr++;		/* skip '%' */
	     assert(sc_isdigit(*lptr));
	     var = atoi(lptr) - 1;
	     assert(var >= 0 && var < _maxoptvars);
	     assert(symbols[var][0] != '\0');	/* variable should be defined */
	     *repl_length += strlen(symbols[var]);
	     break;
	  case '!':
	     *repl_length += 3;	/* '\t', '\n' & '\0' */
	     break;
	  default:
	     *repl_length += 1;
	  }			/* switch */
	lptr++;
     }				/* while */

   /* allocate a buffer to replace the sequence in */
   if (!(buffer = malloc(*repl_length)))
     {
	error(103);
	return NULL;
     }

   /* replace the pattern into this temporary buffer */
   lptr = buffer;
   *lptr++ = '\t';		/* the "replace" patterns do not have tabs */
   while (*pattern)
     {
	assert((int)(lptr - buffer) < *repl_length);
	switch (*pattern)
	  {
	  case '%':
	     /* write out the symbol */
	     pattern++;
	     assert(sc_isdigit(*pattern));
	     var = atoi(pattern) - 1;
	     assert(var >= 0 && var < _maxoptvars);
	     assert(symbols[var][0] != '\0');	/* variable should be defined */
	     strcpy(lptr, symbols[var]);
	     lptr += strlen(symbols[var]);
	     break;
	  case '!':
	     /* finish the line, optionally start the next line with an indent */
	     *lptr++ = '\n';
	     *lptr++ = '\0';
	     if (*(pattern + 1) != '\0')
		*lptr++ = '\t';
	     break;
	  default:
	     *lptr++ = *pattern;
	  }			/* switch */
	pattern++;
     }				/* while */

   assert((int)(lptr - buffer) == *repl_length);
   return buffer;
}

static void
strreplace(char *dest, char *replace, int sub_length, int repl_length,
	   int dest_length)
{
   int                 offset = sub_length - repl_length;

   if (offset > 0)		/* delete a section */
      memmove(dest, dest + offset, dest_length - offset);
   else if (offset < 0)		/* insert a section */
      memmove(dest - offset, dest, dest_length);
   memcpy(dest, replace, repl_length);
}

/*  stgopt
 *
 *  Optimizes the staging buffer by checking for series of instructions that
 *  can be coded more compact. The routine expects the lines in the staging
 *  buffer to be separated with '\n' and '\0' characters.
 *
 *  The longest sequences must be checked first.
 */

static void
stgopt(char *start, char *end)
{
   char                symbols[_maxoptvars][_aliasmax + 1];
   int                 seq, match_length, repl_length;

   assert(sequences != NULL);
   while (start < end)
     {
	if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
	  {
	     /* do not match anything if debug-level is maximum */
	     filewrite(start);
	  }
	else
	  {
	     seq = 0;
	     while (sequences[seq].find)
	       {
		  assert(seq >= 0);
		  if (matchsequence
		      (start, end, sequences[seq].find, symbols, &match_length))
		    {
		       char               *replace =
			  replacesequence(sequences[seq].replace, symbols,
					  &repl_length);
		       /* If the replacement is bigger than the original section, we may need
		        * to "grow" the staging buffer. This is quite complex, due to the
		        * re-ordering of expressions that can also happen in the staging
		        * buffer. In addition, it should not happen: the peephole optimizer
		        * must replace sequences with *shorter* sequences, not longer ones.
		        * So, I simply forbid sequences that are longer than the ones they
		        * are meant to replace.
		        */
		       assert(match_length >= repl_length);
		       if (match_length >= repl_length)
			 {
			    strreplace(start, replace, match_length,
				       repl_length, (int)(end - start));
			    end -= match_length - repl_length;
			    free(replace);
			    code_idx -= sequences[seq].savesize;
			    seq = 0;	/* restart search for matches */
			 }
		       else
			 {
			    /* actually, we should never get here (match_length<repl_length) */
			    assert(0);
			    seq++;
			 }	/* if */
		    }
		  else
		    {
		       seq++;
		    }		/* if */
	       }		/* while */
	     assert(sequences[seq].find == NULL);
	     filewrite(start);
	  }			/* if */
	assert(start < end);
	start += strlen(start) + 1;	/* to next string */
     }				/* while (start<end) */
}

#undef SCPACK_TABLE