A recursive-descent parser has a procedure for each non-terminal. But in the book, the parser (in the file "parser.c") has been hand-optimized to eliminate tail recursion, using the methods described on page 52-53 in ASU-86. For example, the non-terminals term and morefactors are handled by the function term, which contains a while loop. Here, I have replaced the book´s "parse.c" with one without those optimizations. (The original file from the book, renamed "bookparser.c", is included in the zip and tar files. Do not use that file.)
FAQ
Q: Why doesn't it work?
Q: Why doesn't it work?
|
On a Unix or Linux system, with gcc installed, you can compile the program by just typing make.
Jump:
global.h |
lexer.c |
parser.c |
emitter.c |
symbol.c |
init.c |
error.c |
main.c |
Makefile
Download:
global.h |
lexer.c |
parser.c |
emitter.c |
symbol.c |
init.c |
error.c |
main.c |
Makefile
global.h
Download
/* global.h */ #include <stdio.h> /* include declarations for i/o routines */ #include <ctype.h> /* ... and for character test routines */ #include <stdlib.h> /* ... and for some standard routines, such as exit */ #include <string.h> /* ... and for string routines */ #define MAX_ID_LENGTH 128 /* for the buffer size */ #define NONE -1 #define EOS '\0' #define NUM 256 #define DIV 257 #define MOD 258 #define ID 259 #define DONE 260 extern int token_value; /* value of token attribute */ extern int lineno; struct symentry { /* form of symbol table entry */ char *lexeme; int token_type; }; extern struct symentry symtable[]; /* symbol table */ extern void init(); /* loads keywords into symtable */ extern void error(char* message); /* generates all error messages */ extern int lexan(); /* lexical analyzer */ extern void parse(); /* parses and translates expression list */ extern int insert(char *s, int token_type); /* returns position of entry for s */ extern int lookup(char *s); /* returns position of entry for s, or -1 if not found */ extern void emit (int token_type, int token_value); /* generates output */
/* lexer.c */ #include "global.h" char lexeme[MAX_ID_LENGTH + 1]; int lineno = 1; int token_value = NONE; int lexan() /* lexical analyzer */ { int c; while(1) { c = getchar(); if (c == ' ' || c == '\t') ; /* strip out white space */ else if (c == '\n') lineno = lineno + 1; else if (isdigit(c)) { /* c is a digit */ ungetc(c, stdin); scanf("%d", &token_value); return NUM; } else if (isalpha(c)) { /* c is a letter */ int id_number, chars = 0; while (isalnum(c)) { /* c is alphanumeric */ lexeme[chars++] = c; if (chars > MAX_ID_LENGTH) error("identifier too long"); c = getchar(); } lexeme[chars] = EOS; if (c != EOF) ungetc(c, stdin); id_number = lookup(lexeme); if (id_number == -1) id_number = insert(lexeme, ID); token_value = id_number; return symtable[id_number].token_type; } else if (c == EOF) return DONE; else { token_value = NONE; return c; } } }
/* parser.c -- without the optimizations */ #include "global.h" int lookahead; void match(int); void start(), list(), expr(), moreterms(), term(), morefactors(), factor(); void parse() /* parses and translates expression list */ { lookahead = lexan(); start(); } void start () { /* Just one production for start, so we don't need to check lookahead */ list(); match(DONE); } void list() { if (lookahead == '(' || lookahead == ID || lookahead == NUM) { expr(); match(';'); list(); } else { /* Empty */ } } void expr () { /* Just one production for expr, so we don't need to check lookahead */ term(); moreterms(); } void moreterms() { if (lookahead == '+') { match('+'); term(); emit('+', token_value); moreterms(); } else if (lookahead == '-') { match('-'); term(); emit('-', token_value); moreterms(); } else { /* Empty */ } } void term () { /* Just one production for term, so we don't need to check lookahead */ factor(); morefactors(); } void morefactors () { if (lookahead == '*') { match('*'); factor(); emit('*', token_value); morefactors(); } else if (lookahead == '/') { match('/'); factor(); emit('/', token_value); morefactors(); } else if (lookahead == DIV) { match(DIV); factor(); emit(DIV, token_value); morefactors(); } else if (lookahead == MOD) { match(MOD); factor(); emit(MOD, token_value); morefactors(); } else { /* Empty */ } } void factor () { if (lookahead == '(') { match('('); expr(); match(')'); } else if (lookahead == ID) { int id_number = token_value; match(ID); emit(ID, id_number); } else if (lookahead == NUM) { int num_value = token_value; match(NUM); emit(NUM, num_value); } else error("syntax error in factor"); } void match(int t) { if (lookahead == t) lookahead = lexan(); else error ("syntax error in match"); }
/* emitter.c */ #include "global.h" void emit (int token_type, int token_value) /* generates output */ { switch(token_type) { case '+' : case '-' : case '*' : case '/': printf("%c\n", token_type); break; case DIV: printf("DIV\n"); break; case MOD: printf("MOD\n"); break; case NUM: printf("%d\n", token_value); break; case ID: printf("%s\n", symtable[token_value].lexeme); break; default: printf("[Unknown token %d, with value %d]\n", token_type, token_value); } }
/* symbol.c */ #include "global.h" #define MAX_SYMBOLS 100 /* size of symbol table */ struct symentry symtable[MAX_SYMBOLS]; int nr_symbols = 0; /* number of symbols in symtable */ int lookup(char *s) /* returns position of entry for s, or -1 if not found */ { for (int p = nr_symbols - 1; p >= 0; --p) if (strcmp(symtable[p].lexeme, s) == 0) return p; return -1; } int insert(char *s, int token_type) /* returns position of entry for s */ { if (nr_symbols >= MAX_SYMBOLS) error("Symbol table full"); symtable[nr_symbols].token_type = token_type; symtable[nr_symbols].lexeme = strdup(s); return nr_symbols++; }
/* init.c */ #include "global.h" struct symentry keywords[] = { { "div", DIV }, { "mod", MOD, } }; void init() /* loads keywords into symtable */ { int nr_keywords = sizeof(keywords) / sizeof(keywords[0]); for (int i = 0; i < nr_keywords; ++i) insert(keywords[i].lexeme, keywords[i].token_type); }
/* error.c */ #include "global.h" void error(char* message) /* generates all error messages */ { fflush(stdout); fprintf(stderr, "[Error on line %d: %s]\n", lineno, message); exit(EXIT_FAILURE); /* unsuccessful termination */ }
/* main.c */ #include "global.h" int main(void) { init(); parse(); exit(0); /* successful termination */ }
OBJECTS = lexer.o parser.o emitter.o symbol.o init.o error.o main.o SOURCES = lexer.c parser.c emitter.c symbol.c init.c error.c main.c EXE = infix2postfix CFLAGS += -Wall -g $(EXE): $(OBJECTS) gcc $(CFLAGS) -o $(EXE) $(OBJECTS) main.o: main.c global.h gcc $(CFLAGS) -c main.c lexer.o: lexer.c global.h gcc $(CFLAGS) -c lexer.c parser.o: parser.c global.h gcc $(CFLAGS) -c parser.c emitter.o: emitter.c global.h gcc $(CFLAGS) -c emitter.c symbol.o: symbol.c global.h gcc $(CFLAGS) -c symbol.c init.o: init.c global.h gcc $(CFLAGS) -c init.c error.o: error.c global.h gcc $(CFLAGS) -c error.c clean: rm -f $(EXE) $(OBJECTS) 29.tar.gz 29.zip *~ archives: clean cd ..; rm 29.tar 29.zip 29/29.tar 29/29.zip; tar -cvf 29.tar 29; gzip -9 29.tar; zip -r 29.zip 29; mv 29.zip 29/29.zip; mv 29.tar.gz 29/29.tar.gz