a) What is the grammar that this parser implements?
b) What is the language that it defines?
(Some helpful hints: It only understands the tokens, or terminals, a, b and c. Finish the input with EOF, which in Linux is entered using CTRL-D on a separate line. All other input, such as d, is ignored. You can compile and run the program if you want, but you don't have to.)
// A simple parser: parser-1.c #include <stdio.h> #include <ctype.h> #include <stdlib.h> int lookahead; void error(void) { printf("Syntax error: unexpected token '%c'\n", lookahead); exit(EXIT_FAILURE); } // A simple scanner. // Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored. int scan(void) { int input; while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF) ; return input; } void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void S(void), T(void), U(void); void S(void) { T(); } void T(void) { match('a'); match('b'); U(); } void U(void) { match('c'); } int main(void) { printf("Parser 1. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to S(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }
a) What is the grammar that this parser implements?
b) What is the language that it defines?
// A simple parser: parser-2.c #include <stdio.h> #include <ctype.h> #include <stdlib.h> int lookahead; void error(void) { printf("Syntax error: unexpected token '%c'\n", lookahead); exit(EXIT_FAILURE); } // A simple scanner. // Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored. int scan(void) { int input; while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF) ; return input; } void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void S(void), T(void), U(void); void S(void) { T(); T(); U(); } void T(void) { match('a'); U(); } void U(void) { match('b'); match('c'); } int main(void) { printf("Parser 2. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to S(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }
a) What is the grammar that this parser implements?
b) What is the language that it defines?
// A simple parser: parser-3.c #include <stdio.h> #include <ctype.h> #include <stdlib.h> int lookahead; void error(void) { printf("Syntax error: unexpected token '%c'\n", lookahead); exit(EXIT_FAILURE); } // A simple scanner. // Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored. int scan(void) { int input; while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF) ; return input; } void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void S(void), T(void), U(void), V(void); void S(void) { if (lookahead == 'a') { match('a'); T(); } else if (lookahead == 'b') { match('b'); T(); } else { error(); } } void T(void) { if (lookahead == 'a') { U(); match('c'); } else if (lookahead == 'b') { V(); match('c'); } else { error(); } } void U(void) { match('a'); match('b'); } void V(void) { match('b'); match('b'); } int main(void) { printf("Parser 3. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to S(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }
a) What is the grammar that this parser implements?
b) What is the language that it defines?
// A simple parser: parser-4.c #include <stdio.h> #include <ctype.h> #include <stdlib.h> int lookahead; void error(void) { printf("Syntax error: unexpected token '%c'\n", lookahead); exit(EXIT_FAILURE); } // A simple scanner. // Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored. int scan(void) { int input; while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF) ; return input; } void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void S(void), T(void); void S(void) { if (lookahead == 'a') { match('a'); T(); S(); } else if (lookahead == 'b') { match('b'); } else { error(); } } void T(void) { match('a'); match('b'); match('c'); } int main(void) { printf("Parser 4. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to S(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }
a) Write a grammar for this language.
b) Write a parser that implements this grammar.
The terminal variable is a variable name, and consists of one or more lower-case letters. The terminal integer-literal is an integer literal, and consists of one or more normal decimal digits. Empty stands for an empty production. The start symbol is statement-list.statement-list -> statement statement-list | empty statement -> variable = integer-literal ; condition -> variable < variable statement -> if ( condition ) statement
As an example of valid input, we saw this:
if (hjalmar < hulda) if (z < x) w = 33; kalle = 17;a) Write some more examples of valid input.
b) Write some examples of invalid input.
c) Write a parser for the language. Verify that the inputs in a and b are handled as expected. Below is a program skeleton with a scanner and some definitions, so you only have to add the actual parsing part.
// A parser for a subset of C: parser-6.c #include <stdlib.h> #include <stdio.h> #include <ctype.h> #include <string.h> // Single-character tokens are reprseted by their character code. // These are the other token codes. enum token_codes { VARIABLE = 1000, INTEGER_LITERAL = 1001, IF = 1002 }; int lookahead; void error(void) { printf("[Syntax error: unexpected token '%c' (code %d)]\n", (isprint(lookahead) ? lookahead : '?'), lookahead); exit(EXIT_FAILURE); } // A scanner for a subset of C. // Returns one of the tokens '(', ')', ';', '<' and ';', // or one of the tokens VARIABLE, INTEGER_LITERAL, IF, or EOF. // Whitespace is ignored. // Any other character is an error. int scan(void) { while(1) { int c = getchar(); if (c == EOF) return EOF; else if (isspace(c)) ; // Just skip whitespace else if (isdigit(c)) { // This is the start of an integer literal // Read it, but ignore the value while (isdigit(c)) { c = getchar(); } if (c != EOF) ungetc(c, stdin); return INTEGER_LITERAL; } else if (isalpha(c)) { // This is the start of a keyword or variable name #define MAX_ID_LENGTH 100 char buffer[MAX_ID_LENGTH + 1]; int n = 0; while (isalpha(c)) { buffer[n++] = c; if (n > MAX_ID_LENGTH) { printf("[Error: Identifier too long]\n"); exit(EXIT_FAILURE); } c = getchar(); } buffer[n] = '\0'; if (c != EOF) ungetc(c, stdin); if (strcmp(buffer, "if") == 0) return IF; else return VARIABLE; } else if (strchr("();<=", c) != NULL) return c; else { printf("[Error: Unknown character: '%c']\n", c); exit(EXIT_FAILURE); } } // while } // scan void match(int expected) { if (lookahead == expected) lookahead = scan(); else error(); } // These are the non-terminals, calling each other recursively void statement_list(void), statement(void), condition(void); // Write the functions statement_list, statement and condition! int main(void) { printf("Parser 6. Enter input. End with EOF (CTRL-D on Linux).\n"); lookahead = scan(); // To get started, so we have somthing to compare to statement_list(); if (lookahead != EOF) error(); printf("Done!\n"); return EXIT_SUCCESS; }
There are some solutions to some of the questions, but try to solve them yourself first.