S -> T T -> a b U U -> c
b) Language:
a b c
S -> T T U T -> a U U -> b c
b) Language:
a b c a b c b c
S -> a T | b T T -> U c | V c U -> a b V -> b b
b) Language:
a a b c a b b c b a b c b b b c
S -> a T S | b T -> a b c
b) Language:
b aabcb aabcaabcb aabcaabcaabcb ...With a regular expression: (aabc)*b
S -> a b S | b a
With a regular expression: (ab)*ba
b) Parser:
// A simple parser: parser-5.c #include <stdio.h> #include <ctype.h> #include <stdlib.h> int lookahead; void error(void) { printf("Syntax error: unexpected token '%c'\n", lookahead); exit(EXIT_FAILURE); } // A simple scanner. // Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored. int scan(void) { int input; while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF) ; return input; } void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void S(void); void S(void) { if (lookahead == 'a') { match('a'); match('b'); S(); } else if (lookahead == 'b') { match('b'); match('a'); } else { error(); } } int main(void) { printf("Parser 5. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to S(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }
b) Some examples of invalid input:
c) Parser:
// A parser for a subset of C: parser-6.c #include <stdlib.h> #include <stdio.h> #include <ctype.h> #include <string.h> // Single-character tokens are represented by their character code. // These are the other token codes. enum token_codes { VARIABLE = 1000, INTEGER_LITERAL = 1001, IF = 1002 }; int lookahead; void error(void) { printf("[Syntax error: unexpected token '%c' (code %d)]\n", (isprint(lookahead) ? lookahead : '?'), lookahead); exit(EXIT_FAILURE); } // A scanner for a subset of C. // Returns one of the tokens '(', ')', ';', '<' and ';', // or one of the tokens VARIABLE, INTEGER_LITERAL, or EOF. // Whitespace is ignored. // Any other character is an error. int scan(void) { while(1) { int c = getchar(); if (c == EOF) return EOF; else if (isspace(c)) ; // Just skip whitespace else if (isdigit(c)) { // This is the start of an integer literal // Read it, but ignore the value while (isdigit(c)) { c = getchar(); } if (c != EOF) ungetc(c, stdin); return INTEGER_LITERAL; } else if (isalpha(c)) { // This is the start of a keyword or variable name #define MAX_ID_LENGTH 100 char buffer[MAX_ID_LENGTH + 1]; int n = 0; while (isalpha(c)) { buffer[n++] = c; if (n > MAX_ID_LENGTH) { printf("[Error: Identifier too long]\n"); exit(EXIT_FAILURE); } c = getchar(); } buffer[n] = '\0'; if (c != EOF) ungetc(c, stdin); if (strcmp(buffer, "if") == 0) return IF; else return VARIABLE; } else if (strchr("();<=", c) != NULL) return c; else { printf("[Error: Unknown character: '%c']\n", c); exit(EXIT_FAILURE); } } // while } // scan void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void statement_list(void), statement(void), condition(void); void statement_list(void) { if (lookahead == VARIABLE || lookahead == IF) { statement(); statement_list(); } else { // Anything is ok here, since an empty sequence of tokens will fit anywhere } } // statement_list void statement(void) { if (lookahead == VARIABLE) { match(VARIABLE); match('='); match(INTEGER_LITERAL); match(';'); } else if (lookahead == IF) { match(IF); match('('); condition(); match(')'); statement(); } else { error(); } } // statement void condition(void) { match(VARIABLE); match('<'); match(VARIABLE); } int main(void) { printf("Parser 6. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to statement_list(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }