compilation_course_compiler/parser.py

408 lines
14 KiB
Python

import sly
from lexer import Lexer
from helper import *
from items import *
# Notes from reading:
# - it seems no scoping is needed, as it contains only one scope per program
class Parser(sly.Parser):
tokens = Lexer.tokens
symbol_table = {}
had_errors = False # Simply to know if we need to write the program in the end
lines = [ '' ] # Start with en empty line, so len(lines) will represent the next line
last_used_temp = 0 # cpl doesnt allow _ in ids, so use t_NUM as variables
def next_temp(self):
# returns the next temp variable name
self.last_used_temp += 1
return f't_{self.last_used_temp}'
def error(self, p):
self.had_errors = True
@_('declarations stmt_block')
def program(self, p):
self.lines.append('HALT')
self.lines.append('Aviv Romem')
self.lines.pop(0) # remove the empty line
return None
@_('declarations declaration')
def declarations(self, p):
return None
@_('')
def declarations(self, p):
# Empty
return None
# declaration errors
@_(
'idlist error ":" type ";"',
'error idlist ":" type ";"',
'idlist error idlist ":" type ";"',
'idlist error type ";"',
'idlist ":" error ";"',
'idlist ":" type error',
'error ":" type ";"'
)
def declaration(self, p):
self.had_errors = True
print_err(f"Invalid declaration in line {p.lineno}")
@_('idlist ":" type ";"')
def declaration(self, p):
floats = p[2].is_float
ids = p[0]
for i in ids:
if self.symbol_table.get(i.id):
self.had_errors = True
print_err(f'ID {i.id} defined twice, first in line {self.symbol_table[i.id].line}, second time in line {i.line}')
else:
self.symbol_table[i.id] = Symbol(floats, i.line)
return None
@_('INT', 'FLOAT')
def type(self, p):
return Expression('', p[0] == 'FLOAT') # return an item with the type and an empty result
@_('idlist "," ID')
def idlist(self, p):
return p[0] + [ Id(p[2], p.lineno) ]
@_('ID')
def idlist(self, p):
return [ Id(p[0], p.lineno) ]
@_(
'assignment_stmt', 'input_stmt', 'output_stmt', 'if_stmt',
'while_stmt', 'switch_stmt', 'break_stmt', 'stmt_block'
)
def stmt(self, p):
return p[0]
@_('error ";"')
def stmt(self, p):
self.had_errors = True
print_err(f'Invalid statement in line {p.lineno}')
return Statement([])
@_('ID "=" expression ";"')
def assignment_stmt(self, p):
id = p[0]
exp = p[2]
if self.symbol_table.get(id) is None:
self.had_errors = True
print_err(f'Unknown variable {id} at line {p.lineno}')
return Statement([])
sym = self.symbol_table.get(id)
if not sym.is_float and exp.is_float:
self.had_errors = True
print_err(f'Trying to assign a float to an int at line {p.lineno}, did you forget a cast?')
if sym.is_float and not exp.is_float: # we need to cast exp to float
new_exp = self.next_temp()
self.lines.append(f'ITOR {new_exp} {exp.result}')
exp = Exception(new_exp, False)
command = 'RASN' if sym.is_float else 'IASN'
self.lines.append(f'{command} {id} {exp.result}')
return Statement([])
@_('INPUT error ";"')
def input_stmt(self, p):
self.had_errors = True
print_err(f'Invalid input statement in line {p.lineno}')
return Statement([])
@_('INPUT "(" ID ")" ";"')
def input_stmt(self, p):
id = p[2]
if self.symbol_table.get(id) is None:
self.had_errors = True
print_err(f'Unknown variable {id} at line {p.lineno}')
else:
sym = self.symbol_table.get(id)
command = 'RINP' if sym.is_float else 'IINP'
self.lines.append(f'{command} {id}')
return Statement([])
@_('OUTPUT error ";"')
def output_stmt(self, p):
self.had_errors = True
print_err(f'Invalid output statement in line {p.lineno}')
@_('OUTPUT "(" ID ")" ";"')
def output_stmt(self, p):
id = p[2]
if self.symbol_table.get(id) is None:
self.had_errors = True
print_err(f'Unknown variable {id} at line {p.lineno}')
else:
sym = self.symbol_table.get(id)
command = 'RPRT' if sym.is_float else 'IPRT'
self.lines.append(f'{command} {id}')
return Statement([])
@_('IF error stmt')
def if_stmt(self, p):
self.had_errors = True
print_err(f'Invalid if statement in line {p.lineno}')
return Statement([])
@_('IF "(" boolexpr ")" if_jump stmt if_jump ELSE stmt')
def if_stmt(self, p):
exp = p[2]
jump_else = p[4]
jump_end = p[6]
self.lines[jump_else] = f'JMPZ {jump_end + 1} {exp.result}'
self.lines[jump_end] = f'JUMP {len(self.lines)}'
return Statement(p[5].breaks + p[8].breaks) # return the list of breaks from both stmt s
@_('')
def if_jump(self, p):
# append an empty line as a placeholder for the jump
line = len(self.lines)
self.lines.append('')
return line
@_('WHILE error stmt')
def while_stmt(self, p):
self.had_errors = True
print_err(f'Invalid while statement in line {p.lineno}')
return Statement([])
@_('WHILE seen_WHILE "(" boolexpr ")" while_quit stmt')
def while_stmt(self, p):
# return to the start
check_line = p[1]
self.lines.append(f'JUMP {check_line}')
# add the check for the boolexpr
exp = p[3]
jump_if_fail = len(self.lines)
jump_line = p[5] # while_quit
exit_line = f'JMPZ {jump_if_fail} {exp.result}'
self.lines[jump_line] = exit_line
for break_line in p.stmt.breaks:
self.lines[break_line] = f'JUMP {jump_if_fail}'
return Statement([])
@_('')
def while_quit(self, p):
# append an empty line as a placeholder for the jump if the while check failed
line = len(self.lines)
self.lines.append('')
return line
@_('')
def seen_WHILE(self, p):
# helper to get the line number of when we start the check thing for a while expr
return len(self.lines)
@_('SWITCH error "}"', 'SWITCH "(" expression ")" "{" caselist "}"')
def switch_stmt(self, p):
self.had_errors = True
print_err(f'Invalid switch statement in line {p.lineno}')
return Statement([])
@_('SWITCH "(" expression ")" "{" caselist DEFAULT ":" stmtlist "}"')
def switch_stmt(self, p):
# The way this works is, every caselist (and case) return a list of where to put the comparison and jump
# also, where to jump(as in, where is the next case)
# they also add a little jump over the next case check, which would probably be
# removed by the optimizer if there is a break (since we will have 2 jumps one after the other)
exp = p[2]
cases = p[5]
if exp.is_float:
self.had_errors = True
print_err(f'Invalid switch statement expression at line {p.lineno}: Expected an integer expression but found float')
cmp = self.next_temp()
break_line = f'JUMP {len(self.lines)}'
for c in cases:
self.lines[c.line] = f'IEQL {cmp} {exp.result} {c.num}'
self.lines[c.line + 1] = f'JMPZ {c.end} {cmp}'
for b in c.breaks:
self.lines[b] = break_line
if len(cases) > 0:
last_case = cases[-1]
# modify the last jump(that skips over the next check) to not skip the default case
self.lines[last_case.end - 1] = f'JUMP {last_case.end}' # it is stupid, but an optimizer will hae no problem fixing it
for b in p[8].breaks: # breaks in default
self.lines[b] = break_line
return Statement([])
@_('caselist CASE NUM case_check ":" stmtlist')
def caselist(self, p):
# Check that NUM doesnt have a dot(and indeed is an integer)
if p[2].find('.') != -1:
self.had_errors = True
print_err(f'Invalid case constant at line {p.lineno}: Expected an integer but found a float')
# continue as if nothing happend
self.lines.append(f'JUMP {len(self.lines) + 3}') # Jump over the next comparison in the case of fallthrough
line = p[3]
return p[0] + [ Case(p[2], line, len(self.lines), p[5].breaks) ]
@_('')
def case_check(self, p):
line = len(self.lines)
self.lines.append('') # IEQL
self.lines.append('') # JPMZ
return line
@_('')
def caselist(self, p):
return []
@_('BREAK ";"')
def break_stmt(self, p):
line = len(self.lines)
self.lines.append('') # Empty line for break()
return Statement([line])
@_('"{" stmtlist "}"')
def stmt_block(self, p):
return p[1]
@_('stmtlist stmt')
def stmtlist(self, p):
return Statement(p[0].breaks + p[1].breaks)
@_('')
def stmtlist(self, p):
return Statement([]) # Empty item
@_('boolexpr OR boolterm')
def boolexpr(self, p):
res = self.next_temp()
# add, if one is not 0(aka true), result shall be non zero
self.lines.append(f'IADD {res} {p[0].result} {p[2].result}')
return Expression(res, False)
@_('boolterm')
def boolexpr(self, p):
return p[0]
@_('boolterm AND boolfactor')
def boolterm(self, p):
res = self.next_temp()
# multiply, as if one side is 0(aka false), it will be false
# also, bool items are always int
self.lines.append(f'IMLT {res} {p[0].result} {p[2].result}')
return Statement(res, False)
@_('boolfactor')
def boolterm(self, p):
return p[0]
@_('NOT "(" boolexpr ")"')
def boolfactor(self, p):
# as far as i understand, all bool expressions are integers(as RELOP always returns an int)
exp = p[2]
res = self.next_temp()
self.lines.append(f'IEQL {res} {exp.result} 0')
return Expression(res, False)
@_('expression RELOP expression')
def boolfactor(self, p):
float_op = p[0].is_float or p[2].is_float
lhs = p[0].result
rhs = p[2].result
if float_op: # Check if we need to cast someone to float
if not p[0].is_float:
new_term = self.next_temp()
self.lines.append(f'ITOR {new_term} {lhs}')
lhs = new_term
elif not p[2].is_float: # elif since if both are false we wont be here to begin with
new_term = self.next_temp()
self.lines.append(f'ITOR {new_term} {rhs}')
rhs = new_term
# RELOP to command map
com_map = { '<': 'LSS', '>': 'GRT', '==': 'EQL', '!=': 'NQL', '<=': 'GRT', '>=': 'LSS' }
# if we need to negate(since quad doesnt have a built in support for it)
should_negate = [ '<=', '>=' ]
command = ('R' if float_op else 'I') + com_map[p[1]]
result = self.next_temp()
self.lines.append(f'{command} {result} {lhs} {rhs}')
if p[1] in should_negate:
self.lines.append(f'IEQL {result} {result} 0')
return Expression(result, False)
@_('expression ADDOP term')
def expression(self, p):
float_op = p[0].is_float or p[2].is_float
lhs = p[0].result
rhs = p[2].result
if float_op: # Check if we need to cast someone to float
if not p[0].is_float:
new_term = self.next_temp()
self.lines.append(f'ITOR {new_term} {lhs}')
lhs = new_term
elif not p[2].is_float: # elif since if both are false we wont be here to begin with
new_term = self.next_temp()
self.lines.append(f'ITOR {new_term} {rhs}')
rhs = new_term
command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB')
result = self.next_temp()
self.lines.append(f'{command} {result} {lhs} {rhs}')
return Expression(result, float_op)
@_('term')
def expression(self, p):
return p[0]
@_('term MULOP factor')
def term(self, p):
float_op = p[0].is_float or p[2].is_float
lhs = p[0].result
rhs = p[2].result
if float_op: # Check if we need to cast someone to float
if not p[0].is_float:
new_term = self.next_temp()
self.lines.append(f'ITOR {new_term} {lhs}')
lhs = new_term
elif not p[2].is_float: # elif since if both are false we wont be here to begin with
new_term = self.next_temp()
self.lines.append(f'ITOR {new_term} {rhs}')
rhs = new_term
command = ('R' if float_op else 'I') + ('MLT' if p[1] == '*' else 'DIV')
result = self.next_temp()
self.lines.append(f'{command} {result} {lhs} {rhs}')
return Expression(result, float_op)
@_('factor')
def term(self, p):
return p[0]
@_('"(" expression ")"')
def factor(self, p):
return p[1]
@_('CAST "(" expression ")"')
def factor(self, p):
cast_to_float = p[0].find('float') != -1 # if its not a cast to float, its probably a cast to int
exp = p[2]
if(cast_to_float != exp.is_float): # if we cast from int to float or from float to int
casted = self.next_temp()
command = 'ITOR' if cast_to_float else 'RTOI'
self.lines.append(f'{command} {casted} {exp.result}')
return Expression(casted, cast_to_float)
return Expression(exp.result, cast_to_float)
@_('ID')
def factor(self, p):
if self.symbol_table.get(p[0]) is None:
self.had_errors = True
print_err(f'Unknown variable {p[0]} at line {p.lineno}')
return Expression('0', False)
return Expression(p[0], self.symbol_table[p[0]].is_float)
@_('NUM')
def factor(self, p):
return Expression(p.NUM, p[0].find('.') != -1)