compilation_course_compiler/parser.py

import sly
from lexer import Lexer
from helper import *
from items import *

# Notes from reading:
# - it seems no scoping is needed, as it contains only one scope per program

class Parser(sly.Parser):
    tokens = Lexer.tokens
    symbol_table = {}
    had_errors = False # Simply to know if we need to write the program in the end
    lines = [ '' ] # Start with en empty line, so len(lines) will represent the next line
    last_used_temp = 0 # cpl doesnt allow _ in ids, so use t_NUM as variables

    def next_temp(self):
        # returns the next temp variable name
        self.last_used_temp += 1
        return f't_{self.last_used_temp}'

    def error(self, p):
        self.had_errors = True

    @_('declarations stmt_block')
    def program(self, p):
        self.lines.append('HALT')
        self.lines.append('Aviv Romem')
        self.lines.pop(0) # remove the empty line
        return None

    @_('declarations declaration')
    def declarations(self, p):
        return None

    @_('')
    def declarations(self, p):
        # Empty
        return None

    # declaration errors
    @_(
     'idlist error ":" type ";"',
     'error idlist ":" type ";"',
     'idlist error idlist ":" type ";"',
     'idlist error type ";"',
     'idlist ":" error ";"',
     'idlist ":" type error',
     'error ":" type ";"'
     )
    def declaration(self, p):
        self.had_errors = True
        print_err(f"Invalid declaration in line {p.lineno}")

    @_('idlist ":" type ";"')
    def declaration(self, p):
        floats = p[2].is_float
        ids = p[0]
        for i in ids:
            if self.symbol_table.get(i.id):
                self.had_errors = True
                print_err(f'ID {i.id} defined twice, first in line {self.symbol_table[i.id].line}, second time in line {i.line}')
            else:
                self.symbol_table[i.id] = Symbol(floats, i.line)
        return None


    @_('INT', 'FLOAT')
    def type(self, p):
        return Expression('', p[0] == 'FLOAT') # return an item with the type and an empty result

    @_('idlist "," ID')
    def idlist(self, p):
        return p[0] + [ Id(p[2], p.lineno) ]

    @_('ID')
    def idlist(self, p):
        return [ Id(p[0], p.lineno) ]

    @_(
        'assignment_stmt', 'input_stmt', 'output_stmt', 'if_stmt',
        'while_stmt', 'switch_stmt', 'break_stmt', 'stmt_block'
    )
    def stmt(self, p):
        return p[0]

    @_('error ";"')
    def stmt(self, p):
        self.had_errors = True
        print_err(f'Invalid statement in line {p.lineno}')
        return Statement([])

    @_('ID "=" expression ";"')
    def assignment_stmt(self, p):
        id = p[0]
        exp = p[2]
        if self.symbol_table.get(id) is None:
            self.had_errors = True
            print_err(f'Unknown variable {id} at line {p.lineno}')
            return Statement([])
        sym = self.symbol_table.get(id)
        if not sym.is_float and exp.is_float:
            self.had_errors = True
            print_err(f'Trying to assign a float to an int at line {p.lineno}, did you forget a cast?')
        if sym.is_float and not exp.is_float: # we need to cast exp to float
            new_exp = self.next_temp()
            self.lines.append(f'ITOR {new_exp} {exp.result}')
            exp = Exception(new_exp, False)
        command = 'RASN' if sym.is_float else 'IASN'
        self.lines.append(f'{command} {id} {exp.result}')
        return Statement([])


    @_('INPUT error ";"')
    def input_stmt(self, p):
        self.had_errors = True
        print_err(f'Invalid input statement in line {p.lineno}')
        return Statement([])

    @_('INPUT "(" ID ")" ";"')
    def input_stmt(self, p):
        id = p[2]
        if self.symbol_table.get(id) is None:
            self.had_errors = True
            print_err(f'Unknown variable {id} at line {p.lineno}')
        else:
            sym = self.symbol_table.get(id)
            command = 'RINP' if sym.is_float else 'IINP'
            self.lines.append(f'{command} {id}')
        return Statement([])

    @_('OUTPUT error ";"')
    def output_stmt(self, p):
        self.had_errors = True
        print_err(f'Invalid output statement in line {p.lineno}')

    @_('OUTPUT "(" ID ")" ";"')
    def output_stmt(self, p):
        id = p[2]
        if self.symbol_table.get(id) is None:
            self.had_errors = True
            print_err(f'Unknown variable {id} at line {p.lineno}')
        else:
            sym = self.symbol_table.get(id)
            command = 'RPRT' if sym.is_float else 'IPRT'
            self.lines.append(f'{command} {id}')
        return Statement([])

    @_('IF error stmt')
    def if_stmt(self, p):
        self.had_errors = True
        print_err(f'Invalid if statement in line {p.lineno}')
        return Statement([])

    @_('IF "(" boolexpr ")" if_jump stmt if_jump ELSE stmt')
    def if_stmt(self, p):
        exp = p[2]
        jump_else = p[4]
        jump_end = p[6]
        self.lines[jump_else] = f'JMPZ {jump_end + 1} {exp.result}'
        self.lines[jump_end] = f'JUMP {len(self.lines)}'
        return Statement(p[5].breaks + p[8].breaks) # return the list of breaks from both stmt s


    @_('')
    def if_jump(self, p):
        # append an empty line as a placeholder for the jump
        line = len(self.lines)
        self.lines.append('')
        return line

    @_('WHILE error stmt')
    def while_stmt(self, p):
        self.had_errors = True
        print_err(f'Invalid while statement in line {p.lineno}')
        return Statement([])

    @_('WHILE seen_WHILE "(" boolexpr ")" while_quit stmt')
    def while_stmt(self, p):
        # return to the start
        check_line = p[1]
        self.lines.append(f'JUMP {check_line}')
        # add the check for the boolexpr
        exp = p[3]
        jump_if_fail = len(self.lines)
        jump_line = p[5] # while_quit
        exit_line = f'JMPZ {jump_if_fail} {exp.result}'
        self.lines[jump_line] = exit_line
        for break_line in p.stmt.breaks:
            self.lines[break_line] = f'JUMP {jump_if_fail}'
        return Statement([])

    @_('')
    def while_quit(self, p):
        # append an empty line as a placeholder for the jump if the while check failed
        line = len(self.lines)
        self.lines.append('')
        return line

    @_('')
    def seen_WHILE(self, p):
        # helper to get the line number of when we start the check thing for a while expr
        return len(self.lines)

    @_('SWITCH error "}"', 'SWITCH "(" expression ")" "{" caselist "}"')
    def switch_stmt(self, p):
        self.had_errors = True
        print_err(f'Invalid switch statement in line {p.lineno}')
        return Statement([])

    @_('SWITCH "(" expression ")" "{" caselist DEFAULT ":" stmtlist "}"')
    def switch_stmt(self, p):
        # The way this works is, every caselist (and case) return a list of where to put the comparison and jump
        # also, where to jump(as in, where is the next case)
        # they also add a little jump over the next case check, which would probably be
        # removed by the optimizer if there is a break (since we will have 2 jumps one after the other)
        exp = p[2]
        cases = p[5]
        if exp.is_float:
            self.had_errors = True
            print_err(f'Invalid switch statement expression at line {p.lineno}: Expected an integer expression but found float')
        cmp = self.next_temp()
        break_line = f'JUMP {len(self.lines)}'
        for c in cases:
            self.lines[c.line] = f'IEQL {cmp} {exp.result} {c.num}'
            self.lines[c.line + 1] = f'JMPZ {c.end} {cmp}'
            for b in c.breaks:
                self.lines[b] = break_line
        if len(cases) > 0:
            last_case = cases[-1]
            # modify the last jump(that skips over the next check) to not skip the default case
            self.lines[last_case.end - 1] = f'JUMP {last_case.end}' # it is stupid, but an optimizer will hae no problem fixing it
        for b in p[8].breaks: # breaks in default
            self.lines[b] = break_line
        return Statement([])


    @_('caselist CASE NUM case_check ":" stmtlist')
    def caselist(self, p):
        # Check that NUM doesnt have a dot(and indeed is an integer)
        if p[2].find('.') != -1:
            self.had_errors = True
            print_err(f'Invalid case constant at line {p.lineno}: Expected an integer but found a float')
            # continue as if nothing happend
        self.lines.append(f'JUMP {len(self.lines) + 3}') # Jump over the next comparison in the case of fallthrough

        line = p[3]
        return p[0] + [ Case(p[2], line, len(self.lines), p[5].breaks) ]


    @_('')
    def case_check(self, p):
        line = len(self.lines)
        self.lines.append('') # IEQL
        self.lines.append('') # JPMZ
        return line

    @_('')
    def caselist(self, p):
        return []

    @_('BREAK ";"')
    def break_stmt(self, p):
        line = len(self.lines)
        self.lines.append('') # Empty line for break()
        return Statement([line])

    @_('"{" stmtlist "}"')
    def stmt_block(self, p):
        return p[1]

    @_('stmtlist stmt')
    def stmtlist(self, p):
        return Statement(p[0].breaks + p[1].breaks)

    @_('')
    def stmtlist(self, p):
        return Statement([]) # Empty item

    @_('boolexpr OR boolterm')
    def boolexpr(self, p):
        res = self.next_temp()
        # add, if one is not 0(aka true), result shall be non zero
        self.lines.append(f'IADD {res} {p[0].result} {p[2].result}')
        return Expression(res, False)

    @_('boolterm')
    def boolexpr(self, p):
        return p[0]

    @_('boolterm AND boolfactor')
    def boolterm(self, p):
        res = self.next_temp()
        # multiply, as if one side is 0(aka false), it will be false
        # also, bool items are always int
        self.lines.append(f'IMLT {res} {p[0].result} {p[2].result}')
        return Statement(res, False)

    @_('boolfactor')
    def boolterm(self, p):
        return p[0]

    @_('NOT "(" boolexpr ")"')
    def boolfactor(self, p):
        # as far as i understand, all bool expressions are integers(as RELOP always returns an int)
        exp = p[2]
        res = self.next_temp()
        self.lines.append(f'IEQL {res} {exp.result} 0')
        return Expression(res, False)

    @_('expression RELOP expression')
    def boolfactor(self, p):
        float_op = p[0].is_float or p[2].is_float
        lhs = p[0].result
        rhs = p[2].result
        if float_op: # Check if we need to cast someone to float
            if not p[0].is_float:
                new_term = self.next_temp()
                self.lines.append(f'ITOR {new_term} {lhs}')
                lhs = new_term
            elif not p[2].is_float: # elif since if both are false we wont be here to begin with
                new_term = self.next_temp()
                self.lines.append(f'ITOR {new_term} {rhs}')
                rhs = new_term
        # RELOP to command map
        com_map = { '<': 'LSS', '>': 'GRT', '==': 'EQL', '!=': 'NQL', '<=': 'GRT', '>=': 'LSS' }
        # if we need to negate(since quad doesnt have a built in support for it)
        should_negate = [ '<=', '>=' ]
        command = ('R' if float_op else 'I') + com_map[p[1]]
        result = self.next_temp()
        self.lines.append(f'{command} {result} {lhs} {rhs}')
        if p[1] in should_negate:
            self.lines.append(f'IEQL {result} {result} 0')
        return Expression(result, False)

    @_('expression ADDOP term')
    def expression(self, p):
        float_op = p[0].is_float or p[2].is_float
        lhs = p[0].result
        rhs = p[2].result
        if float_op: # Check if we need to cast someone to float
            if not p[0].is_float:
                new_term = self.next_temp()
                self.lines.append(f'ITOR {new_term} {lhs}')
                lhs = new_term
            elif not p[2].is_float: # elif since if both are false we wont be here to begin with
                new_term = self.next_temp()
                self.lines.append(f'ITOR {new_term} {rhs}')
                rhs = new_term
        command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB')
        result = self.next_temp()
        self.lines.append(f'{command} {result} {lhs} {rhs}')
        return Expression(result, float_op)

    @_('term')
    def expression(self, p):
        return p[0]

    @_('term MULOP factor')
    def term(self, p):
        float_op = p[0].is_float or p[2].is_float
        lhs = p[0].result
        rhs = p[2].result
        if float_op: # Check if we need to cast someone to float
            if not p[0].is_float:
                new_term = self.next_temp()
                self.lines.append(f'ITOR {new_term} {lhs}')
                lhs = new_term
            elif not p[2].is_float: # elif since if both are false we wont be here to begin with
                new_term = self.next_temp()
                self.lines.append(f'ITOR {new_term} {rhs}')
                rhs = new_term
        command = ('R' if float_op else 'I') + ('MLT' if p[1] == '*' else 'DIV')
        result = self.next_temp()
        self.lines.append(f'{command} {result} {lhs} {rhs}')
        return Expression(result, float_op)

    @_('factor')
    def term(self, p):
        return p[0]

    @_('"(" expression ")"')
    def factor(self, p):
        return p[1]

    @_('CAST "(" expression ")"')
    def factor(self, p):
        cast_to_float = p[0].find('float') != -1 # if its not a cast to float, its probably a cast to int
        exp = p[2]
        if(cast_to_float != exp.is_float): # if we cast from int to float or from float to int
            casted = self.next_temp()
            command = 'ITOR' if cast_to_float else 'RTOI'
            self.lines.append(f'{command} {casted} {exp.result}')
            return Expression(casted, cast_to_float)
        return Expression(exp.result, cast_to_float)

    @_('ID')
    def factor(self, p):
        if self.symbol_table.get(p[0]) is None:
            self.had_errors = True
            print_err(f'Unknown variable {p[0]} at line {p.lineno}')
            return Expression('0', False)
        return Expression(p[0], self.symbol_table[p[0]].is_float)

    @_('NUM')
    def factor(self, p):
        return Expression(p.NUM, p[0].find('.') != -1)