commit b17f895f0a9d457a6ea5887cb2683dda598ee4fa Author: Rusty Striker Date: Tue Mar 26 21:02:54 2024 +0200 gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..722127a --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# Todo + +- [ ] Error handling(so it wont just print sly: unexpected token or whatever it says) +- [ ] Handle comparisons properly(==,!=,<=,>=,<,>) +- [ ] Proper readme about files \ No newline at end of file diff --git a/cpq.py b/cpq.py new file mode 100644 index 0000000..0e7d962 --- /dev/null +++ b/cpq.py @@ -0,0 +1,31 @@ +import sly +# for ease of reading, the compiler is broken down to multiple files +# parser.py for the parser +# lexer.py for the lexer +# and helper.py for helper functions(such as print_err) +from lexer import Lexer +from parser import Parser +from helper import print_err + + + + +print_err('Aviv Romem') +lexer = Lexer() +parser = Parser() +text = ''' +a: int; +{ + while(a < 10) { + a = a + 1; + if(a == 5) + break; + else + a = a + 0; + } + +} +''' +parser.parse(lexer.tokenize(text)) +for l, t in enumerate(parser.lines): + print(l,':',t) diff --git a/helper.py b/helper.py new file mode 100644 index 0000000..2098dac --- /dev/null +++ b/helper.py @@ -0,0 +1,4 @@ +import sys + +def print_err(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) \ No newline at end of file diff --git a/items.py b/items.py new file mode 100644 index 0000000..67d6028 --- /dev/null +++ b/items.py @@ -0,0 +1,41 @@ +# Item return type: +# { lines, result, is_float } +# - result: end result variable name(or the value itself in case of number literal, also if applicable) +# - is_float: true if the results are of type float(assuming there are results) +# idlist return type shall be a list of all ids and lines( { id, line } ) +class Expression(): + def __init__(self, result: str, is_float: bool): + self.result = result + self.is_float = is_float + +# Statement type: +# - breaks: a list of all lines on which a brake occured, to be handled by relevant rules +class Statement(): + def __init__(self, breaks: list): + self.breaks = breaks + +class Id(): + def __init__(self, id: str, line: int): + self.id = id + self.line = line + +# Case type: +# - num: NUM token lexeme as string +# - line: line at which the comparison check is to be made +# - end: next line after the end of the case block +# - breaks: the list of breaks from the case stmtlist +class Case(): + def __init__(self, num: str, line: int, end: int, breaks: list): + self.num = num + self.line = line + self.end = end + self.breaks = breaks + +# Symbol table type: +# { is_float, line } +# - is_float: true if the type is float(we have only 2 types, it makes it easier to check) +# - line: line defined at, for error handling +class Symbol(): + def __init__(self, is_float, line): + self.is_float = is_float + self.line = line \ No newline at end of file diff --git a/lexer.py b/lexer.py new file mode 100644 index 0000000..e8d9a47 --- /dev/null +++ b/lexer.py @@ -0,0 +1,48 @@ +import sly + +class Lexer(sly.Lexer): + # define token types + tokens = { + # Keyworkds + BREAK, CASE, DEFAULT, ELSE, FLOAT, + IF, INPUT, INT, OUTPUT, SWITCH, WHILE, + # Operators + RELOP, ADDOP, MULOP, OR, AND, NOT, CAST, + # misc + ID, NUM + } + ignore = ' \t' + literals = { '(', ')', '{', '}', ',', ':', ';', '=' } + + # define each token + RELOP = r'(==|!=|<|>|>=|<=)' + ADDOP = r'(\+|-)' + MULOP = r'(\*|/)' + OR = '\|\|' + AND = '&&' + NOT = '!' + CAST = r'static_cast<(int|float)>' + + ID = r'[a-zA-Z][a-zA-Z0-9]*' + NUM = r'[0-9]+(\.[0-9]*)?' + + # define keywords on ID + ID['break'] = BREAK + ID['case'] = CASE + ID['default'] = DEFAULT + ID['else'] = ELSE + ID['float'] = FLOAT + ID['if'] = IF + ID['input'] = INPUT + ID['int'] = INT + ID['output'] = OUTPUT + ID['switch'] = SWITCH + ID['while'] = WHILE + + @_(r'\n+') + def newline(self, t): + self.lineno += t.value.count('\n') + + def error(self, t): + print(f"Error at line {self.lineno}: unexpect character {t.value[0]}") + self.index += 1 \ No newline at end of file diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..f2a570f --- /dev/null +++ b/parser.py @@ -0,0 +1,340 @@ +import sly +from lexer import Lexer +from helper import * +from items import * + +# Notes from reading: +# - it seems no scoping is needed, as it contains only one scope per program + + + + +class Parser(sly.Parser): + tokens = Lexer.tokens + symbol_table = {} + had_errors = False # Simply to know if we need to write the program in the end + lines = [ ] + last_used_temp = 0 # cpl doesnt allow _ in ids, so use t_NUM as variables + + def next_temp(self): + self.last_used_temp += 1 + return f't_{self.last_used_temp}' + + @_('declarations stmt_block') + def program(self, p): + self.lines.append('HALT') + self.lines.append('Aviv Romem') + return None + + @_('declarations declaration') + def declarations(self, p): + return None + + @_('') + def declarations(self, p): + # Empty + return None + + @_('idlist ":" type ";"') + def declaration(self, p): + floats = p[2].is_float + ids = p[0] + for i in ids: + if self.symbol_table.get(i.id): + self.had_errors = True + print_err(f'ID {i.id} defined twice, first in line {self.symbol_table[i.id].line}, second time in line {i.line}') + else: + self.symbol_table[i.id] = Symbol(floats, i.line) + return None + + + @_('INT', 'FLOAT') + def type(self, p): + return Expression('', p[0] == 'FLOAT') # return an item with the type and an empty result + + @_('idlist "," ID') + def idlist(self, p): + return p[0] + [ Id(p[2], p.lineno) ] + + @_('ID') + def idlist(self, p): + return [ Id(p[0], p.lineno) ] + + @_( + 'assignment_stmt', 'input_stmt', 'output_stmt', 'if_stmt', + 'while_stmt', 'switch_stmt', 'break_stmt', 'stmt_block' + ) + def stmt(self, p): + return p[0] + + @_('ID "=" expression ";"') + def assignment_stmt(self, p): + id = p[0] + exp = p[2] + if self.symbol_table.get(id) is None: + self.had_errors = True + print_err(f'Unknown variable {id} at line {p.lineno}') + return Statement([]) + sym = self.symbol_table.get(id) + if not sym.is_float and exp.is_float: + self.had_errors = True + print_err(f'Trying to assign a float to an int at line {p.lineno}, did you forget a cast?') + if sym.is_float and not exp.is_float: # we need to cast exp to float + new_exp = self.next_temp() + self.lines.append(f'ITOR {new_exp} {exp.result}') + exp = Exception(new_exp, False) + command = 'RASN' if sym.is_float else 'IASN' + self.lines.append(f'{command} {id} {exp.result}') + return Statement([]) + + @_('INPUT "(" ID ")" ";"') + def input_stmt(self, p): + id = p[2] + if self.symbol_table.get(id) is None: + self.had_errors = True + print_err(f'Unknown variable {id} at line {p.lineno}') + else: + sym = self.symbol_table.get(id) + command = 'RINP' if sym.is_float else 'IINP' + self.lines.append(f'{command} {id}') + return Statement([]) + + @_('OUTPUT "(" ID ")" ";"') + def output_stmt(self, p): + id = p[2] + if self.symbol_table.get(id) is None: + self.had_errors = True + print_err(f'Unknown variable {id} at line {p.lineno}') + else: + sym = self.symbol_table.get(id) + command = 'RPRT' if sym.is_float else 'IPRT' + self.lines.append(f'{command} {id}') + return Statement([]) + + @_('IF "(" boolexpr ")" if_jump stmt if_jump ELSE stmt') + def if_stmt(self, p): + exp = p[2] + jump_else = p[4] + jump_end = p[6] + self.lines[jump_else] = f'JMPZ {jump_end + 1} {exp.result}' + self.lines[jump_end] = f'JUMP {len(self.lines)}' + return Statement(p[5].breaks + p[8].breaks) # return the list of breaks from both stmt s + + + @_('') + def if_jump(self, p): + # append an empty line as a placeholder for the jump + line = len(self.lines) + self.lines.append('') + return line + + + @_('WHILE seen_WHILE "(" boolexpr ")" while_quit stmt') + def while_stmt(self, p): + # return to the start + check_line = p[1] + self.lines.append(f'JUMP {check_line}') + # add the check for the boolexpr + exp = p[3] + jump_if_fail = len(self.lines) + jump_line = p[5] # while_quit + exit_line = f'JMPZ {jump_if_fail} {exp.result}' + self.lines[jump_line] = exit_line + for break_line in p.stmt.breaks: + self.lines[break_line] = f'JUMP {jump_if_fail}' + return Statement([]) + + @_('') + def while_quit(self, p): + # append an empty line as a placeholder for the jump if the while check failed + line = len(self.lines) + self.lines.append('') + return line + + @_('') + def seen_WHILE(self, p): + # helper to get the line number of when we start the check thing for a while expr + return len(self.lines) + + @_('SWITCH "(" expression ")" "{" caselist DEFAULT ":" stmtlist "}"') + def switch_stmt(self, p): + exp = p[2] + cases = p[5] + if exp.is_float: + self.had_errors = True + print_err(f'Invalid switch statement expression at line {p.lineno}: Expected an integer expression but found float') + cmp = self.next_temp() + break_line = f'JUMP {len(self.lines)}' + for c in cases: + self.lines[c.line] = f'IEQL {cmp} {exp.result} {c.num}' + self.lines[c.line + 1] = f'JMPZ {c.end} {cmp}' + for b in c.breaks: + self.lines[b] = break_line + for b in p[8].breaks: # breaks in default + self.lines[b] = break_line + + + @_('caselist CASE NUM case_check ":" stmtlist') + def caselist(self, p): + # Check that NUM doesnt have a dot(and indeed is an integer) + if p[2].find('.') != -1: + self.had_errors = True + print_err(f'Invalid case constant at line {p.lineno}: Expected an integer but found a float') + # continue as if nothing happend + self.lines.append(f'JUMP {len(self.lines) + 2}') # Jump over the next comparison in the case of fallthrough + line = p[3] + return p[0] + [ Case(p[2], line, len(self.lines), p[5].breaks) ] + + + @_('') + def case_check(self, p): + line = len(self.lines) + self.lines.append('') # IEQL + self.lines.append('') # JPMZ + return line + + @_('') + def caselist(self, p): + return [] + + @_('BREAK ";"') + def break_stmt(self, p): + line = len(self.lines) + self.lines.append('') # Empty line for break() + return Statement([line]) + + @_('"{" stmtlist "}"') + def stmt_block(self, p): + return p[1] + + @_('stmtlist stmt') + def stmtlist(self, p): + return Statement(p[0].breaks + p[1].breaks) + + @_('') + def stmtlist(self, p): + return Statement([]) # Empty item + + @_('boolexpr OR boolterm') + def boolexpr(self, p): + res = self.next_temp() + # add, if one is not 0(aka true), result shall be non zero + self.lines.append(f'IADD {res} {p[0].result} {p[2].result}') + return Expression(res, False) + + @_('boolterm') + def boolexpr(self, p): + return p[0] + + @_('boolterm AND boolfactor') + def boolterm(self, p): + res = self.next_temp() + # multiply, as if one side is 0(aka false), it will be false + # also, bool items are always int + self.lines.append(f'IMLT {res} {p[0].result} {p[2].result}') + return Statement(res, False) + + @_('boolfactor') + def boolterm(self, p): + return p[0] + + @_('NOT "(" boolexpr ")"') + def boolfactor(self, p): + # as far as i understand, all bool expressions are integers(as RELOP always returns an int) + exp = p[2] + res = self.next_temp() + self.lines.append(f'IEQL {res} {exp.result} 0') + return Expression(res, False) + + @_('expression RELOP expression') + def boolfactor(self, p): + float_op = p[0].is_float or p[2].is_float + lhs = p[0].result + rhs = p[2].result + if float_op: # Check if we need to cast someone to float + if not p[0].is_float: + new_term = self.next_temp() + self.lines.append(f'ITOR {new_term} {lhs}') + lhs = new_term + elif not p[2].is_float: # elif since if both are false we wont be here to begin with + new_term = self.next_temp() + self.lines.append(f'ITOR {new_term} {rhs}') + rhs = new_term + # TODO fix this and make it take care of all < > <= >= == != please yes thank you + command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB') + result = self.next_temp() + self.lines.append(f'{command} {result} {lhs} {rhs}') + return Expression(result, False) + + @_('expression ADDOP term') + def expression(self, p): + float_op = p[0].is_float or p[2].is_float + lhs = p[0].result + rhs = p[2].result + if float_op: # Check if we need to cast someone to float + if not p[0].is_float: + new_term = self.next_temp() + self.lines.append(f'ITOR {new_term} {lhs}') + lhs = new_term + elif not p[2].is_float: # elif since if both are false we wont be here to begin with + new_term = self.next_temp() + self.lines.append(f'ITOR {new_term} {rhs}') + rhs = new_term + command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB') + result = self.next_temp() + self.lines.append(f'{command} {result} {lhs} {rhs}') + return Expression(result, float_op) + + @_('term') + def expression(self, p): + return p[0] + + @_('term MULOP factor') + def term(self, p): + float_op = p[0].is_float or p[2].is_float + lhs = p[0].result + rhs = p[2].result + if float_op: # Check if we need to cast someone to float + if not p[0].is_float: + new_term = self.next_temp() + self.lines.append(f'ITOR {new_term} {lhs}') + lhs = new_term + elif not p[2].is_float: # elif since if both are false we wont be here to begin with + new_term = self.next_temp() + self.lines.append(f'ITOR {new_term} {rhs}') + rhs = new_term + command = ('R' if float_op else 'I') + ('MLT' if p[1] == '*' else 'DIV') + result = self.next_temp() + self.lines.append(f'{command} {result} {lhs} {rhs}') + return Expression(result, float_op) + + @_('factor') + def term(self, p): + return p[0] + + @_('"(" expression ")"') + def factor(self, p): + return p[1] + + @_('CAST "(" expression ")"') + def factor(self, p): + cast_to_float = p[0].find('float') != -1 # if its not a cast to float, its probably a cast to int + exp = p[2] + if(cast_to_float != exp.is_float): # if we cast from int to float or from float to int + casted = self.next_temp() + command = 'ITOR' if cast_to_float else 'RTOI' + self.lines.append(f'{command} {casted} {exp.result}') + return Expression(casted, cast_to_float) + return Expression(exp.result, cast_to_float) + + @_('ID') + def factor(self, p): + if self.symbol_table.get(p[0]) is None: + self.had_errors = True + print_err(f'Unknown variable {p[0]} at line {p.lineno}') + return Expression('0', False) + return Expression(p[0], self.symbol_table[p[0]].is_float) + + @_('NUM') + def factor(self, p): + return Expression(p.NUM, p[0].find('.') != -1)