KOI: Some solutions to exercise 3

Please note that these are suggested solutions. There can be other solutions that are also correct.

Question 1

a) Grammar:

S -> T
T -> a b U
U -> c

b) Language:

a b c

Question 2

a) Grammar:

S -> T T U
T -> a U
U -> b c

b) Language:

a b c a b c b c

Question 3

a) Grammar:

S -> a T | b T
T -> U c | V c
U -> a b
V -> b b

b) Language:

a a b c
a b b c
b a b c
b b b c

Question 4

a) Grammar:

S -> a T S | b
T -> a b c

b) Language:


b
aabcb
aabcaabcb
aabcaabcaabcb
...
With a regular expression: (aabc)*b

Question 5

a) Grammar:

S -> a b S | b a

With a regular expression: (ab)*ba

b) Parser:

Download

// A simple parser: parser-5.c

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>

int lookahead;

void error(void) {
    printf("Syntax error: unexpected token '%c'\n", lookahead);
    exit(EXIT_FAILURE);
}

// A simple scanner.
// Returns one of the tokens 'a', 'b' or 'c', or EOF. Other input is ignored.
int scan(void) {
    int input;
    while ((input = getchar()) != 'a' && input != 'b' && input != 'c' && input != EOF)
        ;
    return input;
}

void match(int expected) {
    if (lookahead == expected)
        lookahead = scan();
    else
        error();
}

// These are the non-terminals, calling each other recursively
void S(void);

void S(void) {
    if (lookahead == 'a') {
        match('a'); match('b'); S();
    }
    else if (lookahead == 'b') {
        match('b'); match('a');
    }
    else {
        error();
    }
}

int main(void) {
    printf("Parser 5. Enter input. End with EOF (CTRL-D on Linux).\n");
    lookahead = scan(); // To get started, so we have somthing to compare to
    S();
    if (lookahead != EOF)
        error();
    printf("Done!\n");
    return EXIT_SUCCESS;
}

Question 6

a) Some examples of valid input:

b) Some examples of invalid input:

c) Parser:

Download

// A parser for a subset of C: parser-6.c

#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>

// Single-character tokens are reprseted by their character code.
// These are the other token codes.
enum token_codes {
    VARIABLE = 1000,
    INTEGER_LITERAL = 1001,
    IF = 1002
};

int lookahead;

void error(void) {
    printf("[Syntax error: unexpected token '%c' (code %d)]\n",
           (isprint(lookahead) ? lookahead : '?'), lookahead);
    exit(EXIT_FAILURE);
}

// A scanner for a subset of C.
// Returns one of the tokens '(', ')', ';', '<' and ';',
// or one of the tokens VARIABLE, INTEGER_LITERAL, or EOF.
// Whitespace is ignored.
// Any other  character is an error.
int scan(void) {
    while(1) {
        int c = getchar();
        if (c == EOF)
            return EOF;
        else if (isspace(c))
            ; // Just skip whitespace
        else if (isdigit(c)) {
            // This is the start of an integer literal
            // Read it, but ignore the value
            while (isdigit(c)) {
                c = getchar();
            }
            if (c != EOF)
                ungetc(c, stdin);
            return INTEGER_LITERAL;
        }
        else if (isalpha(c)) {
            // This is the start of a keyword or variable name
            #define MAX_ID_LENGTH 100
            char buffer[MAX_ID_LENGTH + 1];
            int n = 0;
            while (isalpha(c)) {
                buffer[n++] = c;
                if (n > MAX_ID_LENGTH) {
                    printf("[Error: Identifier too long]\n");
                    exit(EXIT_FAILURE);
                }
                c = getchar();
            }
            buffer[n] = '\0';
            if (c != EOF)
                ungetc(c, stdin);
            if (strcmp(buffer, "if") == 0)
                return IF;
            else
                return VARIABLE;
        }
        else if (strchr("();<=", c) != NULL)
            return c;
        else {
            printf("[Error: Unknown character: '%c']\n", c);
            exit(EXIT_FAILURE);
        }
    } // while
} // scan

void match(int expected) {
    if (lookahead == expected)
        lookahead = scan();
    else
        error();
}

// These are the non-terminals, calling each other recursively
void statement_list(void), statement(void), condition(void);

void statement_list(void) {
    if (lookahead == VARIABLE || lookahead == IF) {
        statement(); statement_list();
    }
    else {
        // Anything is ok here, since an empty sequence of tokens will fit anywhere
    }
} // statement_list

void statement(void) {
    if (lookahead == VARIABLE) {
        match(VARIABLE); match('='); match(INTEGER_LITERAL); match(';');
    }
    else if (lookahead == IF) {
        match(IF); match('('); condition(); match(')'); statement();
    }
    else {
        error();
    }
} // statement

void condition(void) {
    match(VARIABLE); match('<'); match(VARIABLE);
}

int main(void) {
    printf("Parser 6. Enter input. End with EOF (CTRL-D on Linux).\n");
    lookahead = scan(); // To get started, so we have somthing to compare to
    statement_list();
    if (lookahead != EOF)
        error();
    printf("Done!\n");
    return EXIT_SUCCESS;
}


Thomas Padron-McCarthy (thomas.padron-mccarthy@oru.se) September 14, 2023