You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
688 lines
19 KiB
688 lines
19 KiB
/* Small compiler - Staging buffer and optimizer |
|
* |
|
* The staging buffer |
|
* ------------------ |
|
* The staging buffer allows buffered output of generated code, deletion |
|
* of redundant code, optimization by a tinkering process and reversing |
|
* the ouput of evaluated expressions (which is used for the reversed |
|
* evaluation of arguments in functions). |
|
* Initially, stgwrite() writes to the file directly, but after a call to |
|
* stgset(TRUE), output is redirected to the buffer. After a call to |
|
* stgset(FALSE), stgwrite()'s output is directed to the file again. Thus |
|
* only one routine is used for writing to the output, which can be |
|
* buffered output or direct output. |
|
* |
|
* staging buffer variables: stgbuf - the buffer |
|
* stgidx - current index in the staging buffer |
|
* staging - if true, write to the staging buffer; |
|
* if false, write to file directly. |
|
* |
|
* Copyright (c) ITB CompuPhase, 1997-2003 |
|
* |
|
* This software is provided "as-is", without any express or implied warranty. |
|
* In no event will the authors be held liable for any damages arising from |
|
* the use of this software. |
|
* |
|
* Permission is granted to anyone to use this software for any purpose, |
|
* including commercial applications, and to alter it and redistribute it |
|
* freely, subject to the following restrictions: |
|
* |
|
* 1. The origin of this software must not be misrepresented; you must not |
|
* claim that you wrote the original software. If you use this software in |
|
* a product, an acknowledgment in the product documentation would be |
|
* appreciated but is not required. |
|
* 2. Altered source versions must be plainly marked as such, and must not be |
|
* misrepresented as being the original software. |
|
* 3. This notice may not be removed or altered from any source distribution. |
|
* |
|
* Version: $Id$ |
|
*/ |
|
|
|
|
|
#ifdef HAVE_CONFIG_H |
|
# include <config.h> |
|
#endif |
|
|
|
#include <assert.h> |
|
#include <stdio.h> |
|
#include <stdlib.h> /* for atoi() */ |
|
#include <string.h> |
|
#include <ctype.h> |
|
|
|
#include "embryo_cc_sc.h" |
|
|
|
#if defined _MSC_VER |
|
#pragma warning(push) |
|
#pragma warning(disable:4125) /* decimal digit terminates octal escape sequence */ |
|
#endif |
|
|
|
#include "embryo_cc_sc7.scp" |
|
|
|
#if defined _MSC_VER |
|
#pragma warning(pop) |
|
#endif |
|
|
|
static void stgstring(char *start, char *end); |
|
static void stgopt(char *start, char *end); |
|
|
|
#define sSTG_GROW 512 |
|
#define sSTG_MAX 20480 |
|
|
|
static char *stgbuf = NULL; |
|
static int stgmax = 0; /* current size of the staging buffer */ |
|
|
|
#define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1) |
|
|
|
static void |
|
grow_stgbuffer(int requiredsize) |
|
{ |
|
char *p; |
|
int clear = !stgbuf; /* if previously none, empty buffer explicitly */ |
|
|
|
assert(stgmax < requiredsize); |
|
/* if the staging buffer (holding intermediate code for one line) grows |
|
* over a few kBytes, there is probably a run-away expression |
|
*/ |
|
if (requiredsize > sSTG_MAX) |
|
error(102, "staging buffer"); /* staging buffer overflow (fatal error) */ |
|
stgmax = requiredsize + sSTG_GROW; |
|
if (stgbuf) |
|
p = (char *)realloc(stgbuf, stgmax * sizeof(char)); |
|
else |
|
p = (char *)malloc(stgmax * sizeof(char)); |
|
if (!p) |
|
error(102, "staging buffer"); /* staging buffer overflow (fatal error) */ |
|
stgbuf = p; |
|
if (clear) |
|
*stgbuf = '\0'; |
|
} |
|
|
|
void |
|
stgbuffer_cleanup(void) |
|
{ |
|
if (stgbuf) |
|
{ |
|
free(stgbuf); |
|
stgbuf = NULL; |
|
stgmax = 0; |
|
} /* if */ |
|
} |
|
|
|
/* the variables "stgidx" and "staging" are declared in "scvars.c" */ |
|
|
|
/* stgmark |
|
* |
|
* Copies a mark into the staging buffer. At this moment there are three |
|
* possible marks: |
|
* sSTARTREORDER identifies the beginning of a series of expression |
|
* strings that must be written to the output file in |
|
* reordered order |
|
* sENDREORDER identifies the end of 'reverse evaluation' |
|
* sEXPRSTART + idx only valid within a block that is evaluated in |
|
* reordered order, it identifies the start of an |
|
* expression; the "idx" value is the argument position |
|
* |
|
* Global references: stgidx (altered) |
|
* stgbuf (altered) |
|
* staging (referred to only) |
|
*/ |
|
void |
|
stgmark(char mark) |
|
{ |
|
if (staging) |
|
{ |
|
CHECK_STGBUFFER(stgidx); |
|
stgbuf[stgidx++] = mark; |
|
} /* if */ |
|
} |
|
|
|
static int |
|
filewrite(char *str) |
|
{ |
|
if (sc_status == statWRITE) |
|
return sc_writeasm(outf, str); |
|
return TRUE; |
|
} |
|
|
|
/* stgwrite |
|
* |
|
* Writes the string "st" to the staging buffer or to the output file. In the |
|
* case of writing to the staging buffer, the terminating byte of zero is |
|
* copied too, but... the optimizer can only work on complete lines (not on |
|
* fractions of it. Therefore if the string is staged, if the last character |
|
* written to the buffer is a '\0' and the previous-to-last is not a '\n', |
|
* the string is concatenated to the last string in the buffer (the '\0' is |
|
* overwritten). This also means an '\n' used in the middle of a string isn't |
|
* recognized and could give wrong results with the optimizer. |
|
* Even when writing to the output file directly, all strings are buffered |
|
* until a whole line is complete. |
|
* |
|
* Global references: stgidx (altered) |
|
* stgbuf (altered) |
|
* staging (referred to only) |
|
*/ |
|
void |
|
stgwrite(char *st) |
|
{ |
|
int len; |
|
|
|
CHECK_STGBUFFER(0); |
|
if (staging) |
|
{ |
|
if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0' |
|
&& stgbuf[stgidx - 2] != '\n') |
|
stgidx -= 1; /* overwrite last '\0' */ |
|
while (*st != '\0') |
|
{ /* copy to staging buffer */ |
|
CHECK_STGBUFFER(stgidx); |
|
stgbuf[stgidx++] = *st++; |
|
} /* while */ |
|
CHECK_STGBUFFER(stgidx); |
|
stgbuf[stgidx++] = '\0'; |
|
} |
|
else |
|
{ |
|
CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1); |
|
strcat(stgbuf, st); |
|
len = strlen(stgbuf); |
|
if (len > 0 && stgbuf[len - 1] == '\n') |
|
{ |
|
filewrite(stgbuf); |
|
stgbuf[0] = '\0'; |
|
} /* if */ |
|
} /* if */ |
|
} |
|
|
|
/* stgout |
|
* |
|
* Writes the staging buffer to the output file via stgstring() (for |
|
* reversing expressions in the buffer) and stgopt() (for optimizing). It |
|
* resets "stgidx". |
|
* |
|
* Global references: stgidx (altered) |
|
* stgbuf (referred to only) |
|
* staging (referred to only) |
|
*/ |
|
void |
|
stgout(int idx) |
|
{ |
|
if (!staging) |
|
return; |
|
stgstring(&stgbuf[idx], &stgbuf[stgidx]); |
|
stgidx = idx; |
|
} |
|
|
|
typedef struct |
|
{ |
|
char *start, *end; |
|
} argstack; |
|
|
|
/* stgstring |
|
* |
|
* Analyses whether code strings should be output to the file as they appear |
|
* in the staging buffer or whether portions of it should be re-ordered. |
|
* Re-ordering takes place in function argument lists; Small passes arguments |
|
* to functions from right to left. When arguments are "named" rather than |
|
* positional, the order in the source stream is indeterminate. |
|
* This function calls itself recursively in case it needs to re-order code |
|
* strings, and it uses a private stack (or list) to mark the start and the |
|
* end of expressions in their correct (reversed) order. |
|
* In any case, stgstring() sends a block as large as possible to the |
|
* optimizer stgopt(). |
|
* |
|
* In "reorder" mode, each set of code strings must start with the token |
|
* sEXPRSTART, even the first. If the token sSTARTREORDER is represented |
|
* by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies: |
|
* '[]...' valid, but useless; no output |
|
* '[|...] valid, but useless; only one string |
|
* '[|...|...] valid and useful |
|
* '[...|...] invalid, first string doesn't start with '|' |
|
* '[|...|] invalid |
|
*/ |
|
static void |
|
stgstring(char *start, char *end) |
|
{ |
|
char *ptr; |
|
int nest, argc, arg; |
|
argstack *stack; |
|
|
|
while (start < end) |
|
{ |
|
if (*start == sSTARTREORDER) |
|
{ |
|
start += 1; /* skip token */ |
|
/* allocate a argstack with sMAXARGS items */ |
|
stack = (argstack *) malloc(sMAXARGS * sizeof(argstack)); |
|
if (!stack) |
|
error(103); /* insufficient memory */ |
|
nest = 1; /* nesting counter */ |
|
argc = 0; /* argument counter */ |
|
arg = -1; /* argument index; no valid argument yet */ |
|
do |
|
{ |
|
switch (*start) |
|
{ |
|
case sSTARTREORDER: |
|
nest++; |
|
start++; |
|
break; |
|
case sENDREORDER: |
|
nest--; |
|
start++; |
|
break; |
|
default: |
|
if ((*start & sEXPRSTART) == sEXPRSTART) |
|
{ |
|
if (nest == 1) |
|
{ |
|
if (arg >= 0) |
|
stack[arg].end = start - 1; /* finish previous argument */ |
|
arg = (unsigned char)*start - sEXPRSTART; |
|
stack[arg].start = start + 1; |
|
if (arg >= argc) |
|
argc = arg + 1; |
|
} /* if */ |
|
start++; |
|
} |
|
else |
|
{ |
|
start += strlen(start) + 1; |
|
} /* if */ |
|
} /* switch */ |
|
} |
|
while (nest); /* enddo */ |
|
if (arg >= 0) |
|
stack[arg].end = start - 1; /* finish previous argument */ |
|
while (argc > 0) |
|
{ |
|
argc--; |
|
stgstring(stack[argc].start, stack[argc].end); |
|
} /* while */ |
|
free(stack); |
|
} |
|
else |
|
{ |
|
ptr = start; |
|
while (ptr < end && *ptr != sSTARTREORDER) |
|
ptr += strlen(ptr) + 1; |
|
stgopt(start, ptr); |
|
start = ptr; |
|
} /* if */ |
|
} /* while */ |
|
} |
|
|
|
/* stgdel |
|
* |
|
* Scraps code from the staging buffer by resetting "stgidx" to "index". |
|
* |
|
* Global references: stgidx (altered) |
|
* staging (referred to only) |
|
*/ |
|
void |
|
stgdel(int idx, cell code_index) |
|
{ |
|
if (staging) |
|
{ |
|
stgidx = idx; |
|
code_idx = code_index; |
|
} /* if */ |
|
} |
|
|
|
int |
|
stgget(int *idx, cell * code_index) |
|
{ |
|
if (staging) |
|
{ |
|
*idx = stgidx; |
|
*code_index = code_idx; |
|
} /* if */ |
|
return staging; |
|
} |
|
|
|
/* stgset |
|
* |
|
* Sets staging on or off. If it's turned off, the staging buffer must be |
|
* initialized to an empty string. If it's turned on, the routine makes sure |
|
* the index ("stgidx") is set to 0 (it should already be 0). |
|
* |
|
* Global references: staging (altered) |
|
* stgidx (altered) |
|
* stgbuf (contents altered) |
|
*/ |
|
void |
|
stgset(int onoff) |
|
{ |
|
staging = onoff; |
|
if (staging) |
|
{ |
|
assert(stgidx == 0); |
|
stgidx = 0; |
|
CHECK_STGBUFFER(stgidx); |
|
/* write any contents that may be put in the buffer by stgwrite() |
|
* when "staging" was 0 |
|
*/ |
|
if (stgbuf[0] != '\0') |
|
filewrite(stgbuf); |
|
} /* if */ |
|
stgbuf[0] = '\0'; |
|
} |
|
|
|
/* phopt_init |
|
* Initialize all sequence strings of the peehole optimizer. The strings |
|
* are embedded in the .EXE file in compressed format, here we expand |
|
* them (and allocate memory for the sequences). |
|
*/ |
|
static SEQUENCE *sequences; |
|
|
|
int |
|
phopt_init(void) |
|
{ |
|
int number, i, len; |
|
char str[160]; |
|
|
|
/* count number of sequences */ |
|
for (number = 0; sequences_cmp[number].find; number++) |
|
/* nothing */ ; |
|
number++; /* include an item for the NULL terminator */ |
|
|
|
if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE)))) |
|
return FALSE; |
|
|
|
/* pre-initialize all to NULL (in case of failure) */ |
|
for (i = 0; i < number; i++) |
|
{ |
|
sequences[i].find = NULL; |
|
sequences[i].replace = NULL; |
|
sequences[i].savesize = 0; |
|
} /* for */ |
|
|
|
/* expand all strings */ |
|
for (i = 0; i < number - 1; i++) |
|
{ |
|
len = |
|
strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str, |
|
SCPACK_TABLE); |
|
assert(len <= (int)(sizeof(str))); |
|
assert(len == (int)(strlen(str) + 1)); |
|
sequences[i].find = (char *)malloc(len); |
|
if (sequences[i].find) |
|
strcpy(sequences[i].find, str); |
|
len = |
|
strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str, |
|
SCPACK_TABLE); |
|
assert(len <= (int)(sizeof(str))); |
|
assert(len == (int)(strlen(str) + 1)); |
|
sequences[i].replace = (char *)malloc(len); |
|
if (sequences[i].replace) |
|
strcpy(sequences[i].replace, str); |
|
sequences[i].savesize = sequences_cmp[i].savesize; |
|
if (!sequences[i].find || !sequences[i].replace) |
|
return phopt_cleanup(); |
|
} /* for */ |
|
|
|
return TRUE; |
|
} |
|
|
|
int |
|
phopt_cleanup(void) |
|
{ |
|
int i; |
|
|
|
if (sequences) |
|
{ |
|
i = 0; |
|
while (sequences[i].find || sequences[i].replace) |
|
{ |
|
if (sequences[i].find) |
|
free(sequences[i].find); |
|
if (sequences[i].replace) |
|
free(sequences[i].replace); |
|
i++; |
|
} /* while */ |
|
free(sequences); |
|
sequences = NULL; |
|
} /* if */ |
|
return FALSE; |
|
} |
|
|
|
#define _maxoptvars 4 |
|
#define _aliasmax 10 /* a 32-bit number can be represented in |
|
* 9 decimal digits */ |
|
|
|
static int |
|
matchsequence(char *start, char *end, char *pattern, |
|
char symbols[_maxoptvars][_aliasmax + 1], int *match_length) |
|
{ |
|
int var, i; |
|
char str[_aliasmax + 1]; |
|
char *start_org = start; |
|
|
|
*match_length = 0; |
|
for (var = 0; var < _maxoptvars; var++) |
|
symbols[var][0] = '\0'; |
|
|
|
while (*start == '\t' || *start == ' ') |
|
start++; |
|
while (*pattern) |
|
{ |
|
if (start >= end) |
|
return FALSE; |
|
switch (*pattern) |
|
{ |
|
case '%': /* new "symbol" */ |
|
pattern++; |
|
assert(sc_isdigit(*pattern)); |
|
var = atoi(pattern) - 1; |
|
assert(var >= 0 && var < _maxoptvars); |
|
assert(alphanum(*start)); |
|
for (i = 0; start < end && alphanum(*start); i++, start++) |
|
{ |
|
assert(i <= _aliasmax); |
|
str[i] = *start; |
|
} /* for */ |
|
str[i] = '\0'; |
|
if (symbols[var][0] != '\0') |
|
{ |
|
if (strcmp(symbols[var], str) != 0) |
|
return FALSE; /* symbols should be identical */ |
|
} |
|
else |
|
{ |
|
strcpy(symbols[var], str); |
|
} /* if */ |
|
break; |
|
case ' ': |
|
if (*start != '\t' && *start != ' ') |
|
return FALSE; |
|
while ((start < end && *start == '\t') || *start == ' ') |
|
start++; |
|
break; |
|
case '!': |
|
while ((start < end && *start == '\t') || *start == ' ') |
|
start++; /* skip trailing white space */ |
|
if (*start != '\n') |
|
return FALSE; |
|
assert(*(start + 1) == '\0'); |
|
start += 2; /* skip '\n' and '\0' */ |
|
if (*(pattern + 1) != '\0') |
|
while ((start < end && *start == '\t') || *start == ' ') |
|
start++; /* skip leading white space of next instruction */ |
|
break; |
|
default: |
|
if (tolower(*start) != tolower(*pattern)) |
|
return FALSE; |
|
start++; |
|
} /* switch */ |
|
pattern++; |
|
} /* while */ |
|
|
|
*match_length = (int)(start - start_org); |
|
return TRUE; |
|
} |
|
|
|
static char * |
|
replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1], |
|
int *repl_length) |
|
{ |
|
char *lptr; |
|
int var; |
|
char *buffer; |
|
|
|
/* calculate the length of the new buffer |
|
* this is the length of the pattern plus the length of all symbols (note |
|
* that the same symbol may occur multiple times in the pattern) plus |
|
* line endings and startings ('\t' to start a line and '\n\0' to end one) |
|
*/ |
|
assert(repl_length != NULL); |
|
*repl_length = 0; |
|
lptr = pattern; |
|
while (*lptr) |
|
{ |
|
switch (*lptr) |
|
{ |
|
case '%': |
|
lptr++; /* skip '%' */ |
|
assert(sc_isdigit(*lptr)); |
|
var = atoi(lptr) - 1; |
|
assert(var >= 0 && var < _maxoptvars); |
|
assert(symbols[var][0] != '\0'); /* variable should be defined */ |
|
*repl_length += strlen(symbols[var]); |
|
break; |
|
case '!': |
|
*repl_length += 3; /* '\t', '\n' & '\0' */ |
|
break; |
|
default: |
|
*repl_length += 1; |
|
} /* switch */ |
|
lptr++; |
|
} /* while */ |
|
|
|
/* allocate a buffer to replace the sequence in */ |
|
if (!(buffer = malloc(*repl_length))) |
|
{ |
|
error(103); |
|
return NULL; |
|
} |
|
|
|
/* replace the pattern into this temporary buffer */ |
|
lptr = buffer; |
|
*lptr++ = '\t'; /* the "replace" patterns do not have tabs */ |
|
while (*pattern) |
|
{ |
|
assert((int)(lptr - buffer) < *repl_length); |
|
switch (*pattern) |
|
{ |
|
case '%': |
|
/* write out the symbol */ |
|
pattern++; |
|
assert(sc_isdigit(*pattern)); |
|
var = atoi(pattern) - 1; |
|
assert(var >= 0 && var < _maxoptvars); |
|
assert(symbols[var][0] != '\0'); /* variable should be defined */ |
|
strcpy(lptr, symbols[var]); |
|
lptr += strlen(symbols[var]); |
|
break; |
|
case '!': |
|
/* finish the line, optionally start the next line with an indent */ |
|
*lptr++ = '\n'; |
|
*lptr++ = '\0'; |
|
if (*(pattern + 1) != '\0') |
|
*lptr++ = '\t'; |
|
break; |
|
default: |
|
*lptr++ = *pattern; |
|
} /* switch */ |
|
pattern++; |
|
} /* while */ |
|
|
|
assert((int)(lptr - buffer) == *repl_length); |
|
return buffer; |
|
} |
|
|
|
static void |
|
strreplace(char *dest, char *replace, int sub_length, int repl_length, |
|
int dest_length) |
|
{ |
|
int offset = sub_length - repl_length; |
|
|
|
if (offset > 0) /* delete a section */ |
|
memmove(dest, dest + offset, dest_length - offset); |
|
else if (offset < 0) /* insert a section */ |
|
memmove(dest - offset, dest, dest_length); |
|
memcpy(dest, replace, repl_length); |
|
} |
|
|
|
/* stgopt |
|
* |
|
* Optimizes the staging buffer by checking for series of instructions that |
|
* can be coded more compact. The routine expects the lines in the staging |
|
* buffer to be separated with '\n' and '\0' characters. |
|
* |
|
* The longest sequences must be checked first. |
|
*/ |
|
|
|
static void |
|
stgopt(char *start, char *end) |
|
{ |
|
char symbols[_maxoptvars][_aliasmax + 1]; |
|
int seq, match_length, repl_length; |
|
|
|
assert(sequences != NULL); |
|
while (start < end) |
|
{ |
|
if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE) |
|
{ |
|
/* do not match anything if debug-level is maximum */ |
|
filewrite(start); |
|
} |
|
else |
|
{ |
|
seq = 0; |
|
while (sequences[seq].find) |
|
{ |
|
assert(seq >= 0); |
|
if (matchsequence |
|
(start, end, sequences[seq].find, symbols, &match_length)) |
|
{ |
|
char *replace = |
|
replacesequence(sequences[seq].replace, symbols, |
|
&repl_length); |
|
/* If the replacement is bigger than the original section, we may need |
|
* to "grow" the staging buffer. This is quite complex, due to the |
|
* re-ordering of expressions that can also happen in the staging |
|
* buffer. In addition, it should not happen: the peephole optimizer |
|
* must replace sequences with *shorter* sequences, not longer ones. |
|
* So, I simply forbid sequences that are longer than the ones they |
|
* are meant to replace. |
|
*/ |
|
assert(match_length >= repl_length); |
|
if (match_length >= repl_length) |
|
{ |
|
strreplace(start, replace, match_length, |
|
repl_length, (int)(end - start)); |
|
end -= match_length - repl_length; |
|
free(replace); |
|
code_idx -= sequences[seq].savesize; |
|
seq = 0; /* restart search for matches */ |
|
} |
|
else |
|
{ |
|
/* actually, we should never get here (match_length<repl_length) */ |
|
assert(0); |
|
seq++; |
|
} /* if */ |
|
} |
|
else |
|
{ |
|
seq++; |
|
} /* if */ |
|
} /* while */ |
|
assert(sequences[seq].find == NULL); |
|
filewrite(start); |
|
} /* if */ |
|
assert(start < end); |
|
start += strlen(start) + 1; /* to next string */ |
|
} /* while (start<end) */ |
|
} |
|
|
|
#undef SCPACK_TABLE
|
|
|