A recursive-descent parser has a procedure for each non-terminal. But in the book, the parser (in the file "parser.c") has been hand-optimized to eliminate tail recursion, using the methods described on page 52-53 in ASU-86. For example, the non-terminals term and morefactors are handled by the function term, which contains a while loop. Here, I have replaced the book´s "parse.c" with one without those optimizations. (The original file from the book, renamed "bookparser.c", is included in the zip and tar files. Do not use that file.)
|
FAQ
Q: Why doesn't it work?
Q: Why doesn't it work?
|
On a Unix or Linux system, with gcc installed, you can compile the program by just typing make.
Jump:
global.h |
lexer.c |
parser.c |
emitter.c |
symbol.c |
init.c |
error.c |
main.c |
Makefile
Download:
global.h |
lexer.c |
parser.c |
emitter.c |
symbol.c |
init.c |
error.c |
main.c |
Makefile
global.h
Download
/* global.h */
#include <stdio.h> /* include declarations for i/o routines */
#include <ctype.h> /* ... and for character test routines */
#include <stdlib.h> /* ... and for some standard routines, such as exit */
#include <string.h> /* ... and for string routines */
#define MAX_ID_LENGTH 128 /* for the buffer size */
#define NONE -1
#define EOS '\0'
#define NUM 256
#define DIV 257
#define MOD 258
#define ID 259
#define DONE 260
extern int token_value; /* value of token attribute */
extern int lineno;
struct symentry { /* form of symbol table entry */
char *lexeme;
int token_type;
};
extern struct symentry symtable[]; /* symbol table */
extern void init(); /* loads keywords into symtable */
extern void error(char* message); /* generates all error messages */
extern int lexan(); /* lexical analyzer */
extern void parse(); /* parses and translates expression list */
extern int insert(char *s, int token_type); /* returns position of entry for s */
extern int lookup(char *s); /* returns position of entry for s, or -1 if not found */
extern void emit (int token_type, int token_value); /* generates output */
/* lexer.c */
#include "global.h"
char lexeme[MAX_ID_LENGTH + 1];
int lineno = 1;
int token_value = NONE;
int lexan() /* lexical analyzer */
{
int c;
while(1) {
c = getchar();
if (c == ' ' || c == '\t')
; /* strip out white space */
else if (c == '\n')
lineno = lineno + 1;
else if (isdigit(c)) { /* c is a digit */
ungetc(c, stdin);
scanf("%d", &token_value);
return NUM;
}
else if (isalpha(c)) { /* c is a letter */
int id_number, chars = 0;
while (isalnum(c)) { /* c is alphanumeric */
lexeme[chars++] = c;
if (chars > MAX_ID_LENGTH)
error("identifier too long");
c = getchar();
}
lexeme[chars] = EOS;
if (c != EOF)
ungetc(c, stdin);
id_number = lookup(lexeme);
if (id_number == -1)
id_number = insert(lexeme, ID);
token_value = id_number;
return symtable[id_number].token_type;
}
else if (c == EOF)
return DONE;
else {
token_value = NONE;
return c;
}
}
}
/* parser.c -- without the optimizations */
#include "global.h"
int lookahead;
void match(int);
void start(), list(), expr(), moreterms(), term(), morefactors(), factor();
void parse() /* parses and translates expression list */
{
lookahead = lexan();
start();
}
void start ()
{
/* Just one production for start, so we don't need to check lookahead */
list(); match(DONE);
}
void list()
{
if (lookahead == '(' || lookahead == ID || lookahead == NUM) {
expr(); match(';'); list();
}
else {
/* Empty */
}
}
void expr ()
{
/* Just one production for expr, so we don't need to check lookahead */
term(); moreterms();
}
void moreterms()
{
if (lookahead == '+') {
match('+'); term(); emit('+', token_value); moreterms();
}
else if (lookahead == '-') {
match('-'); term(); emit('-', token_value); moreterms();
}
else {
/* Empty */
}
}
void term ()
{
/* Just one production for term, so we don't need to check lookahead */
factor(); morefactors();
}
void morefactors ()
{
if (lookahead == '*') {
match('*'); factor(); emit('*', token_value); morefactors();
}
else if (lookahead == '/') {
match('/'); factor(); emit('/', token_value); morefactors();
}
else if (lookahead == DIV) {
match(DIV); factor(); emit(DIV, token_value); morefactors();
}
else if (lookahead == MOD) {
match(MOD); factor(); emit(MOD, token_value); morefactors();
}
else {
/* Empty */
}
}
void factor ()
{
if (lookahead == '(') {
match('('); expr(); match(')');
}
else if (lookahead == ID) {
int id_number = token_value;
match(ID);
emit(ID, id_number);
}
else if (lookahead == NUM) {
int num_value = token_value;
match(NUM);
emit(NUM, num_value);
}
else
error("syntax error in factor");
}
void match(int t)
{
if (lookahead == t)
lookahead = lexan();
else
error ("syntax error in match");
}
/* emitter.c */
#include "global.h"
void emit (int token_type, int token_value) /* generates output */
{
switch(token_type) {
case '+' : case '-' : case '*' : case '/':
printf("%c\n", token_type); break;
case DIV:
printf("DIV\n"); break;
case MOD:
printf("MOD\n"); break;
case NUM:
printf("%d\n", token_value); break;
case ID:
printf("%s\n", symtable[token_value].lexeme); break;
default:
printf("[Unknown token %d, with value %d]\n", token_type, token_value);
}
}
/* symbol.c */
#include "global.h"
#define MAX_SYMBOLS 100 /* size of symbol table */
struct symentry symtable[MAX_SYMBOLS];
int nr_symbols = 0; /* number of symbols in symtable */
int lookup(char *s) /* returns position of entry for s, or -1 if not found */
{
for (int p = nr_symbols - 1; p >= 0; --p)
if (strcmp(symtable[p].lexeme, s) == 0)
return p;
return -1;
}
int insert(char *s, int token_type) /* returns position of entry for s */
{
if (nr_symbols >= MAX_SYMBOLS)
error("Symbol table full");
symtable[nr_symbols].token_type = token_type;
symtable[nr_symbols].lexeme = strdup(s);
return nr_symbols++;
}
/* init.c */
#include "global.h"
struct symentry keywords[] = {
{ "div", DIV },
{ "mod", MOD, }
};
void init() /* loads keywords into symtable */
{
int nr_keywords = sizeof(keywords) / sizeof(keywords[0]);
for (int i = 0; i < nr_keywords; ++i)
insert(keywords[i].lexeme, keywords[i].token_type);
}
/* error.c */
#include "global.h"
void error(char* message) /* generates all error messages */
{
fflush(stdout);
fprintf(stderr, "[Error on line %d: %s]\n", lineno, message);
exit(EXIT_FAILURE); /* unsuccessful termination */
}
/* main.c */
#include "global.h"
int main(void)
{
init();
parse();
exit(0); /* successful termination */
}
OBJECTS = lexer.o parser.o emitter.o symbol.o init.o error.o main.o SOURCES = lexer.c parser.c emitter.c symbol.c init.c error.c main.c EXE = infix2postfix CFLAGS += -Wall -g $(EXE): $(OBJECTS) gcc $(CFLAGS) -o $(EXE) $(OBJECTS) main.o: main.c global.h gcc $(CFLAGS) -c main.c lexer.o: lexer.c global.h gcc $(CFLAGS) -c lexer.c parser.o: parser.c global.h gcc $(CFLAGS) -c parser.c emitter.o: emitter.c global.h gcc $(CFLAGS) -c emitter.c symbol.o: symbol.c global.h gcc $(CFLAGS) -c symbol.c init.o: init.c global.h gcc $(CFLAGS) -c init.c error.o: error.c global.h gcc $(CFLAGS) -c error.c clean: rm -f $(EXE) $(OBJECTS) 29.tar.gz 29.zip *~ archives: clean cd ..; rm 29.tar 29.zip 29/29.tar 29/29.zip; tar -cvf 29.tar 29; gzip -9 29.tar; zip -r 29.zip 29; mv 29.zip 29/29.zip; mv 29.tar.gz 29/29.tar.gz