summaryrefslogtreecommitdiff
path: root/src/bin/embryo/embryo_cc_sc7.c
blob: f18302d229d3109c39c1a10c40b6bbd84f8f1459 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
/*  Small compiler - Staging buffer and optimizer
 *
 *  The staging buffer
 *  ------------------
 *  The staging buffer allows buffered output of generated code, deletion
 *  of redundant code, optimization by a tinkering process and reversing
 *  the ouput of evaluated expressions (which is used for the reversed
 *  evaluation of arguments in functions).
 *  Initially, stgwrite() writes to the file directly, but after a call to
 *  stgset(TRUE), output is redirected to the buffer. After a call to
 *  stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
 *  only one routine is used for writing to the output, which can be
 *  buffered output or direct output.
 *
 *  staging buffer variables:   stgbuf  - the buffer
 *                              stgidx  - current index in the staging buffer
 *                              staging - if true, write to the staging buffer;
 *                                        if false, write to file directly.
 *
 *  Copyright (c) ITB CompuPhase, 1997-2003
 *
 *  This software is provided "as-is", without any express or implied warranty.
 *  In no event will the authors be held liable for any damages arising from
 *  the use of this software.
 *
 *  Permission is granted to anyone to use this software for any purpose,
 *  including commercial applications, and to alter it and redistribute it
 *  freely, subject to the following restrictions:
 *
 *  1.  The origin of this software must not be misrepresented; you must not
 *      claim that you wrote the original software. If you use this software in
 *      a product, an acknowledgment in the product documentation would be
 *      appreciated but is not required.
 *  2.  Altered source versions must be plainly marked as such, and must not be
 *      misrepresented as being the original software.
 *  3.  This notice may not be removed or altered from any source distribution.
 *
 *  Version: $Id$
 */


#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>		/* for atoi() */
#include <string.h>
#include <ctype.h>

#include "embryo_cc_sc.h"

#if defined _MSC_VER
#pragma warning(push)
#pragma warning(disable:4125)	/* decimal digit terminates octal escape sequence */
#endif

#include "embryo_cc_sc7.scp"

#if defined _MSC_VER
#pragma warning(pop)
#endif

static void         stgstring(char *start, char *end);
static void         stgopt(char *start, char *end);

#define sSTG_GROW   512
#define sSTG_MAX    20480

static char        *stgbuf = NULL;
static int          stgmax = 0;	/* current size of the staging buffer */

#define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)

static void
grow_stgbuffer(int requiredsize)
{
   char               *p;
   int                 clear = !stgbuf;	/* if previously none, empty buffer explicitly */

   assert(stgmax < requiredsize);
   /* if the staging buffer (holding intermediate code for one line) grows
    * over a few kBytes, there is probably a run-away expression
    */
   if (requiredsize > sSTG_MAX)
      error(102, "staging buffer");	/* staging buffer overflow (fatal error) */
   stgmax = requiredsize + sSTG_GROW;
   if (stgbuf)
      p = (char *)realloc(stgbuf, stgmax * sizeof(char));
   else
      p = (char *)malloc(stgmax * sizeof(char));
   if (!p)
      error(102, "staging buffer");	/* staging buffer overflow (fatal error) */
   stgbuf = p;
   if (clear)
      *stgbuf = '\0';
}

void
stgbuffer_cleanup(void)
{
   if (stgbuf)
     {
	free(stgbuf);
	stgbuf = NULL;
	stgmax = 0;
     }				/* if */
}

/* the variables "stgidx" and "staging" are declared in "scvars.c" */

/*  stgmark
 *
 *  Copies a mark into the staging buffer. At this moment there are three
 *  possible marks:
 *     sSTARTREORDER    identifies the beginning of a series of expression
 *                      strings that must be written to the output file in
 *                      reordered order
 *    sENDREORDER       identifies the end of 'reverse evaluation'
 *    sEXPRSTART + idx  only valid within a block that is evaluated in
 *                      reordered order, it identifies the start of an
 *                      expression; the "idx" value is the argument position
 *
 *  Global references: stgidx  (altered)
 *                     stgbuf  (altered)
 *                     staging (referred to only)
 */
void
stgmark(char mark)
{
   if (staging)
     {
	CHECK_STGBUFFER(stgidx);
	stgbuf[stgidx++] = mark;
     }				/* if */
}

static int
filewrite(char *str)
{
   if (sc_status == statWRITE)
      return sc_writeasm(outf, str);
   return TRUE;
}

/*  stgwrite
 *
 *  Writes the string "st" to the staging buffer or to the output file. In the
 *  case of writing to the staging buffer, the terminating byte of zero is
 *  copied too, but... the optimizer can only work on complete lines (not on
 *  fractions of it. Therefore if the string is staged, if the last character
 *  written to the buffer is a '\0' and the previous-to-last is not a '\n',
 *  the string is concatenated to the last string in the buffer (the '\0' is
 *  overwritten). This also means an '\n' used in the middle of a string isn't
 *  recognized and could give wrong results with the optimizer.
 *  Even when writing to the output file directly, all strings are buffered
 *  until a whole line is complete.
 *
 *  Global references: stgidx  (altered)
 *                     stgbuf  (altered)
 *                     staging (referred to only)
 */
void
stgwrite(char *st)
{
   int                 len;

   CHECK_STGBUFFER(0);
   if (staging)
     {
	if (stgidx >= 2 && stgbuf[stgidx - 1] == '\0'
	    && stgbuf[stgidx - 2] != '\n')
	   stgidx -= 1;		/* overwrite last '\0' */
	while (*st != '\0')
	  {			/* copy to staging buffer */
	     CHECK_STGBUFFER(stgidx);
	     stgbuf[stgidx++] = *st++;
	  }			/* while */
	CHECK_STGBUFFER(stgidx);
	stgbuf[stgidx++] = '\0';
     }
   else
     {
	CHECK_STGBUFFER(strlen(stgbuf) + strlen(st) + 1);
	strcat(stgbuf, st);
	len = strlen(stgbuf);
	if (len > 0 && stgbuf[len - 1] == '\n')
	  {
	     filewrite(stgbuf);
	     stgbuf[0] = '\0';
	  }			/* if */
     }				/* if */
}

/*  stgout
 *
 *  Writes the staging buffer to the output file via stgstring() (for
 *  reversing expressions in the buffer) and stgopt() (for optimizing). It
 *  resets "stgidx".
 *
 *  Global references: stgidx  (altered)
 *                     stgbuf  (referred to only)
 *                     staging (referred to only)
 */
void
stgout(int idx)
{
   if (!staging)
      return;
   stgstring(&stgbuf[idx], &stgbuf[stgidx]);
   stgidx = idx;
}

typedef struct
{
   char               *start, *end;
} argstack;

/*  stgstring
 *
 *  Analyses whether code strings should be output to the file as they appear
 *  in the staging buffer or whether portions of it should be re-ordered.
 *  Re-ordering takes place in function argument lists; Small passes arguments
 *  to functions from right to left. When arguments are "named" rather than
 *  positional, the order in the source stream is indeterminate.
 *  This function calls itself recursively in case it needs to re-order code
 *  strings, and it uses a private stack (or list) to mark the start and the
 *  end of expressions in their correct (reversed) order.
 *  In any case, stgstring() sends a block as large as possible to the
 *  optimizer stgopt().
 *
 *  In "reorder" mode, each set of code strings must start with the token
 *  sEXPRSTART, even the first. If the token sSTARTREORDER is represented
 *  by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
 *     '[]...'     valid, but useless; no output
 *     '[|...]     valid, but useless; only one string
 *     '[|...|...] valid and useful
 *     '[...|...]  invalid, first string doesn't start with '|'
 *     '[|...|]    invalid
 */
static void
stgstring(char *start, char *end)
{
   char               *ptr;
   int                 nest, argc, arg;
   argstack           *stack;

   while (start < end)
     {
	if (*start == sSTARTREORDER)
	  {
	     start += 1;	/* skip token */
	     /* allocate a argstack with sMAXARGS items */
	     stack = (argstack *) malloc(sMAXARGS * sizeof(argstack));
	     if (!stack)
		error(103);	/* insufficient memory */
	     nest = 1;		/* nesting counter */
	     argc = 0;		/* argument counter */
	     arg = -1;		/* argument index; no valid argument yet */
	     do
	       {
		  switch (*start)
		    {
		    case sSTARTREORDER:
		       nest++;
		       start++;
		       break;
		    case sENDREORDER:
		       nest--;
		       start++;
		       break;
		    default:
		       if ((*start & sEXPRSTART) == sEXPRSTART)
			 {
			    if (nest == 1)
			      {
				 if (arg >= 0)
				    stack[arg].end = start - 1;	/* finish previous argument */
				 arg = (unsigned char)*start - sEXPRSTART;
				 stack[arg].start = start + 1;
				 if (arg >= argc)
				    argc = arg + 1;
			      }	/* if */
			    start++;
			 }
		       else
			 {
			    start += strlen(start) + 1;
			 }	/* if */
		    }		/* switch */
	       }
	     while (nest);	/* enddo */
	     if (arg >= 0)
		stack[arg].end = start - 1;	/* finish previous argument */
	     while (argc > 0)
	       {
		  argc--;
		  stgstring(stack[argc].start, stack[argc].end);
	       }		/* while */
	     free(stack);
	  }
	else
	  {
	     ptr = start;
	     while (ptr < end && *ptr != sSTARTREORDER)
		ptr += strlen(ptr) + 1;
	     stgopt(start, ptr);
	     start = ptr;
	  }			/* if */
     }				/* while */
}

/*  stgdel
 *
 *  Scraps code from the staging buffer by resetting "stgidx" to "index".
 *
 *  Global references: stgidx (altered)
 *                     staging (referred to only)
 */
void
stgdel(int idx, cell code_index)
{
   if (staging)
     {
	stgidx = idx;
	code_idx = code_index;
     }				/* if */
}

int
stgget(int *idx, cell * code_index)
{
   if (staging)
     {
	*idx = stgidx;
	*code_index = code_idx;
     }				/* if */
   return staging;
}

/*  stgset
 *
 *  Sets staging on or off. If it's turned off, the staging buffer must be
 *  initialized to an empty string. If it's turned on, the routine makes sure
 *  the index ("stgidx") is set to 0 (it should already be 0).
 *
 *  Global references: staging  (altered)
 *                     stgidx   (altered)
 *                     stgbuf   (contents altered)
 */
void
stgset(int onoff)
{
   staging = onoff;
   if (staging)
     {
	assert(stgidx == 0);
	stgidx = 0;
	CHECK_STGBUFFER(stgidx);
	/* write any contents that may be put in the buffer by stgwrite()
	 * when "staging" was 0
	 */
	if (stgbuf[0] != '\0')
	   filewrite(stgbuf);
     }				/* if */
   stgbuf[0] = '\0';
}

/* phopt_init
 * Initialize all sequence strings of the peehole optimizer. The strings
 * are embedded in the .EXE file in compressed format, here we expand
 * them (and allocate memory for the sequences).
 */
static SEQUENCE    *sequences;

int
phopt_init(void)
{
   int                 number, i, len;
   char                str[160];

   /* count number of sequences */
   for (number = 0; sequences_cmp[number].find; number++)
      /* nothing */ ;
   number++;			/* include an item for the NULL terminator */

   if (!(sequences = (SEQUENCE *)malloc(number * sizeof(SEQUENCE))))
      return FALSE;

   /* pre-initialize all to NULL (in case of failure) */
   for (i = 0; i < number; i++)
     {
	sequences[i].find = NULL;
	sequences[i].replace = NULL;
	sequences[i].savesize = 0;
     }				/* for */

   /* expand all strings */
   for (i = 0; i < number - 1; i++)
     {
	len =
	   strexpand(str, (unsigned char *)sequences_cmp[i].find, sizeof str,
		     SCPACK_TABLE);
	assert(len <= (int)(sizeof(str)));
	assert(len == (int)(strlen(str) + 1));
	sequences[i].find = (char *)malloc(len);
	if (sequences[i].find)
	   strcpy(sequences[i].find, str);
	len =
	   strexpand(str, (unsigned char *)sequences_cmp[i].replace, sizeof str,
		     SCPACK_TABLE);
	assert(len <= (int)(sizeof(str)));
	assert(len == (int)(strlen(str) + 1));
	sequences[i].replace = (char *)malloc(len);
	if (sequences[i].replace)
	   strcpy(sequences[i].replace, str);
	sequences[i].savesize = sequences_cmp[i].savesize;
	if (!sequences[i].find || !sequences[i].replace)
	   return phopt_cleanup();
     }				/* for */

   return TRUE;
}

int
phopt_cleanup(void)
{
   int                 i;

   if (sequences)
     {
	i = 0;
	while (sequences[i].find || sequences[i].replace)
	  {
	     if (sequences[i].find)
		free(sequences[i].find);
	     if (sequences[i].replace)
		free(sequences[i].replace);
	     i++;
	  }			/* while */
	free(sequences);
	sequences = NULL;
     }				/* if */
   return FALSE;
}

#define _maxoptvars     4
#define _aliasmax       10	/* a 32-bit number can be represented in
				 * 9 decimal digits */

static int
matchsequence(char *start, char *end, char *pattern,
	      char symbols[_maxoptvars][_aliasmax + 1], int *match_length)
{
   int                 var, i;
   char                str[_aliasmax + 1];
   char               *start_org = start;

   *match_length = 0;
   for (var = 0; var < _maxoptvars; var++)
      symbols[var][0] = '\0';

   while (*start == '\t' || *start == ' ')
      start++;
   while (*pattern)
     {
	if (start >= end)
	   return FALSE;
	switch (*pattern)
	  {
	  case '%':		/* new "symbol" */
	     pattern++;
	     assert(sc_isdigit(*pattern));
	     var = atoi(pattern) - 1;
	     assert(var >= 0 && var < _maxoptvars);
	     assert(alphanum(*start));
	     for (i = 0; start < end && alphanum(*start); i++, start++)
	       {
		  assert(i <= _aliasmax);
		  str[i] = *start;
	       }		/* for */
	     str[i] = '\0';
	     if (symbols[var][0] != '\0')
	       {
		  if (strcmp(symbols[var], str) != 0)
		     return FALSE;	/* symbols should be identical */
	       }
	     else
	       {
		  strcpy(symbols[var], str);
	       }		/* if */
	     break;
	  case ' ':
	     if (*start != '\t' && *start != ' ')
		return FALSE;
	     while ((start < end && *start == '\t') || *start == ' ')
		start++;
	     break;
	  case '!':
	     while ((start < end && *start == '\t') || *start == ' ')
		start++;	/* skip trailing white space */
	     if (*start != '\n')
		return FALSE;
	     assert(*(start + 1) == '\0');
	     start += 2;	/* skip '\n' and '\0' */
	     if (*(pattern + 1) != '\0')
		while ((start < end && *start == '\t') || *start == ' ')
		   start++;	/* skip leading white space of next instruction */
	     break;
	  default:
	     if (tolower(*start) != tolower(*pattern))
		return FALSE;
	     start++;
	  }			/* switch */
	pattern++;
     }				/* while */

   *match_length = (int)(start - start_org);
   return TRUE;
}

static char        *
replacesequence(char *pattern, char symbols[_maxoptvars][_aliasmax + 1],
		int *repl_length)
{
   char               *lptr;
   int                 var;
   char               *buffer;

   /* calculate the length of the new buffer
    * this is the length of the pattern plus the length of all symbols (note
    * that the same symbol may occur multiple times in the pattern) plus
    * line endings and startings ('\t' to start a line and '\n\0' to end one)
    */
   assert(repl_length != NULL);
   *repl_length = 0;
   lptr = pattern;
   while (*lptr)
     {
	switch (*lptr)
	  {
	  case '%':
	     lptr++;		/* skip '%' */
	     assert(sc_isdigit(*lptr));
	     var = atoi(lptr) - 1;
	     assert(var >= 0 && var < _maxoptvars);
	     assert(symbols[var][0] != '\0');	/* variable should be defined */
	     *repl_length += strlen(symbols[var]);
	     break;
	  case '!':
	     *repl_length += 3;	/* '\t', '\n' & '\0' */
	     break;
	  default:
	     *repl_length += 1;
	  }			/* switch */
	lptr++;
     }				/* while */

   /* allocate a buffer to replace the sequence in */
   if (!(buffer = malloc(*repl_length)))
     {
	error(103);
	return NULL;
     }

   /* replace the pattern into this temporary buffer */
   lptr = buffer;
   *lptr++ = '\t';		/* the "replace" patterns do not have tabs */
   while (*pattern)
     {
	assert((int)(lptr - buffer) < *repl_length);
	switch (*pattern)
	  {
	  case '%':
	     /* write out the symbol */
	     pattern++;
	     assert(sc_isdigit(*pattern));
	     var = atoi(pattern) - 1;
	     assert(var >= 0 && var < _maxoptvars);
	     assert(symbols[var][0] != '\0');	/* variable should be defined */
	     strcpy(lptr, symbols[var]);
	     lptr += strlen(symbols[var]);
	     break;
	  case '!':
	     /* finish the line, optionally start the next line with an indent */
	     *lptr++ = '\n';
	     *lptr++ = '\0';
	     if (*(pattern + 1) != '\0')
		*lptr++ = '\t';
	     break;
	  default:
	     *lptr++ = *pattern;
	  }			/* switch */
	pattern++;
     }				/* while */

   assert((int)(lptr - buffer) == *repl_length);
   return buffer;
}

static void
strreplace(char *dest, char *replace, int sub_length, int repl_length,
	   int dest_length)
{
   int                 offset = sub_length - repl_length;

   if (offset > 0)		/* delete a section */
      memmove(dest, dest + offset, dest_length - offset);
   else if (offset < 0)		/* insert a section */
      memmove(dest - offset, dest, dest_length);
   memcpy(dest, replace, repl_length);
}

/*  stgopt
 *
 *  Optimizes the staging buffer by checking for series of instructions that
 *  can be coded more compact. The routine expects the lines in the staging
 *  buffer to be separated with '\n' and '\0' characters.
 *
 *  The longest sequences must be checked first.
 */

static void
stgopt(char *start, char *end)
{
   char                symbols[_maxoptvars][_aliasmax + 1];
   int                 seq, match_length, repl_length;

   assert(sequences != NULL);
   while (start < end)
     {
	if ((sc_debug & sNOOPTIMIZE) != 0 || sc_status != statWRITE)
	  {
	     /* do not match anything if debug-level is maximum */
	     filewrite(start);
	  }
	else
	  {
	     seq = 0;
	     while (sequences[seq].find)
	       {
		  assert(seq >= 0);
		  if (matchsequence
		      (start, end, sequences[seq].find, symbols, &match_length))
		    {
		       char               *replace =
			  replacesequence(sequences[seq].replace, symbols,
					  &repl_length);
		       /* If the replacement is bigger than the original section, we may need
		        * to "grow" the staging buffer. This is quite complex, due to the
		        * re-ordering of expressions that can also happen in the staging
		        * buffer. In addition, it should not happen: the peephole optimizer
		        * must replace sequences with *shorter* sequences, not longer ones.
		        * So, I simply forbid sequences that are longer than the ones they
		        * are meant to replace.
		        */
		       assert(match_length >= repl_length);
		       if (match_length >= repl_length)
			 {
			    strreplace(start, replace, match_length,
				       repl_length, (int)(end - start));
			    end -= match_length - repl_length;
			    free(replace);
			    code_idx -= sequences[seq].savesize;
			    seq = 0;	/* restart search for matches */
			 }
		       else
			 {
			    /* actually, we should never get here (match_length<repl_length) */
			    assert(0);
			    seq++;
			 }	/* if */
		    }
		  else
		    {
		       seq++;
		    }		/* if */
	       }		/* while */
	     assert(sequences[seq].find == NULL);
	     filewrite(start);
	  }			/* if */
	assert(start < end);
	start += strlen(start) + 1;	/* to next string */
     }				/* while (start<end) */
}

#undef SCPACK_TABLE