A recursive-descent parser has a procedure for each non-terminal. But in the book, the parser (in the file "parser.c") has been hand-optimized to eliminate tail recursion, using the methods described on page 52-53 in ASU-86. For example, the non-terminals term and morefactors are handled by the function term, which contains a while loop. Here, I have replaced the book´s "parse.c" with one without those optimizations. (The original file from the book, renamed "bookparser.c", is included in the zip and tar files. Do not use that file.)
On a Unix or Linux system, with gcc installed, you can compile the program by just typing make.
Jump:
global.h |
lexer.c |
parser.c |
emitter.c |
symbol.c |
init.c |
error.c |
main.c |
Makefile
Download:
global.h |
lexer.c |
parser.c |
emitter.c |
symbol.c |
init.c |
error.c |
main.c |
Makefile
global.h
Download
/* global.h */ #include <stdio.h> /* include declarations for i/o routines */ #include <ctype.h> /* ... and for character test routines */ #include <stdlib.h> /* ... and for some standard routines, such as exit */ #include <string.h> /* ... and for string routines */ #define BSIZE 128 /* buffer size */ #define NONE -1 #define EOS '\0' #define NUM 256 #define DIV 257 #define MOD 258 #define ID 259 #define DONE 260 extern int tokenval; /* value of token attribute */ extern int lineno; struct entry { /* form of symbol table entry */ char *lexptr; int token; }; extern struct entry symtable[]; /* symbol table */ extern void init(); /* loads keywords into symtable */ extern void error(char* m); /* generates all error messages */ extern int lexan(); /* lexical analyzer */ extern void parse(); /* parses and translates expression list */ extern int insert(char *s, int tok); /* returns position of entry for s */ extern int lookup(char *s); /* returns position of entry for s */ extern void emit (int t, int tval); /* generates output */
/* lexer.c */ #include "global.h" char lexbuf[BSIZE]; int lineno = 1; int tokenval = NONE; int lexan () /* lexical analyzer */ { int t; while(1) { t = getchar (); if (t == ' ' || t == '\t') ; /* strip out white space */ else if (t == '\n') lineno = lineno + 1; else if (isdigit (t)) { /* t is a digit */ ungetc(t, stdin); scanf("%d", &tokenval); return NUM; } else if (isalpha(t)) { /* t is a letter */ int p, b = 0; while (isalnum(t)) { /* t is alphanumeric */ lexbuf [b] = t; t = getchar (); b = b + 1; if (b >= BSIZE) error("compiler error"); } lexbuf[b] = EOS; if (t != EOF) ungetc(t, stdin); p = lookup (lexbuf); if (p == 0) p = insert (lexbuf, ID); tokenval = p; return symtable[p].token; } else if (t == EOF) return DONE; else { tokenval = NONE; return t; } } }
/* parser.c -- without the optimizations */ #include "global.h" int lookahead; void match(int); void start(), list(), expr(), moreterms(), term(), morefactors(), factor(); void parse() /* parses and translates expression list */ { lookahead = lexan(); start(); } void start () { /* Just one production for start, so we don't need to check lookahead */ list(); match(DONE); } void list() { if (lookahead == '(' || lookahead == ID || lookahead == NUM) { expr(); match(';'); list(); } else { /* Empty */ } } void expr () { /* Just one production for expr, so we don't need to check lookahead */ term(); moreterms(); } void moreterms() { if (lookahead == '+') { match('+'); term(); emit('+', tokenval); moreterms(); } else if (lookahead == '-') { match('-'); term(); emit('-', tokenval); moreterms(); } else { /* Empty */ } } void term () { /* Just one production for term, so we don't need to check lookahead */ factor(); morefactors(); } void morefactors () { if (lookahead == '*') { match('*'); factor(); emit('*', tokenval); morefactors(); } else if (lookahead == '/') { match('/'); factor(); emit('/', tokenval); morefactors(); } else if (lookahead == DIV) { match(DIV); factor(); emit(DIV, tokenval); morefactors(); } else if (lookahead == MOD) { match(MOD); factor(); emit(MOD, tokenval); morefactors(); } else { /* Empty */ } } void factor () { if (lookahead == '(') { match('('); expr(); match(')'); } else if (lookahead == ID) { int id_lexeme = tokenval; match(ID); emit(ID, id_lexeme); } else if (lookahead == NUM) { int num_value = tokenval; match(NUM); emit(NUM, num_value); } else error("syntax error in factor"); } void match(int t) { if (lookahead == t) lookahead = lexan(); else error ("syntax error in match"); }
/* emitter.c */ #include "global.h" void emit (int t, int tval) /* generates output */ { switch(t) { case '+' : case '-' : case '*' : case '/': printf("%c\n", t); break; case DIV: printf("DIV\n"); break; case MOD: printf("MOD\n"); break; case NUM: printf("%d\n", tval); break; case ID: printf("%s\n", symtable[tval].lexptr); break; default: printf("token %d, tokenval %d\n", t, tval); } }
/* symbol.c */ #include "global.h" #define STRMAX 999 /* size of lexemes array */ #define SYMMAX 100 /* size of symbol table */ char lexemes[STRMAX]; int lastchar = - 1; /* last used position in lexemes */ struct entry symtable[SYMMAX]; int lastentry = 0; /* last used position in symtable */ int lookup(char *s) /* returns position of entry for s */ { int p; for (p = lastentry; p > 0; p = p - 1) if (strcmp(symtable[p].lexptr, s) == 0) return p; return 0; } int insert(char *s, int tok) /* returns position of entry for s */ { int len; len = strlen(s); /* strlen computes length of s */ if (lastentry + 1 >= SYMMAX) error ("symbol table full"); if (lastchar + len + 1 >= STRMAX) error ("lexemes array full"); lastentry = lastentry + 1; symtable[lastentry].token = tok; symtable[lastentry].lexptr = &lexemes[lastchar + 1]; lastchar = lastchar + len + 1; strcpy(symtable[lastentry].lexptr, s); return lastentry; }
/* init.c */ #include "global.h" struct entry keywords[] = { { "div", DIV }, { "mod", MOD, }, { 0, 0 } }; void init() /* loads keywords into symtable */ { struct entry *p; for (p = keywords; p->token; p++) insert(p->lexptr, p->token); }
/* error.c */ #include "global.h" void error(char* m) /* generates all error messages */ { fprintf(stderr, "line %d: %s\n", lineno, m); exit(EXIT_FAILURE); /* unsuccessful termination */ }
/* main.c */ #include "global.h" int main(void) { init(); parse(); exit(0); /* successful termination */ }
OBJ = lexer.o parser.o emitter.o symbol.o init.o error.o main.o SRC = lexer.c parser.c emitter.c symbol.c init.c error.c main.c EXE = infix2postfix infix2postfix: $(OBJ) gcc -Wall -o infix2postfix $(OBJ) main.o: main.c global.h gcc -Wall -c main.c lexer.o: lexer.c global.h gcc -Wall -c lexer.c parser.o: parser.c global.h gcc -Wall -c parser.c emitter.o: emitter.c global.h gcc -Wall -c emitter.c symbol.o: symbol.c global.h gcc -Wall -c symbol.c init.o: init.c global.h gcc -Wall -c init.c error.o: error.c global.h gcc -Wall -c error.c clean: rm -f $(EXE) $(OBJ) 29.tar.gz 29.zip *~ archives: clean cd ..; rm 29.tar 29.zip 29/29.tar 29/29.zip; tar -cvf 29.tar 29; gzip -9 29.tar; zip -r 29.zip 29; mv 29.zip 29/29.zip; mv 29.tar.gz 29/29.tar.gz