gitignore
This commit is contained in:
commit
b17f895f0a
7 changed files with 470 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
__pycache__/
|
5
README.md
Normal file
5
README.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Todo
|
||||||
|
|
||||||
|
- [ ] Error handling(so it wont just print sly: unexpected token or whatever it says)
|
||||||
|
- [ ] Handle comparisons properly(==,!=,<=,>=,<,>)
|
||||||
|
- [ ] Proper readme about files
|
31
cpq.py
Normal file
31
cpq.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
import sly
|
||||||
|
# for ease of reading, the compiler is broken down to multiple files
|
||||||
|
# parser.py for the parser
|
||||||
|
# lexer.py for the lexer
|
||||||
|
# and helper.py for helper functions(such as print_err)
|
||||||
|
from lexer import Lexer
|
||||||
|
from parser import Parser
|
||||||
|
from helper import print_err
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print_err('Aviv Romem')
|
||||||
|
lexer = Lexer()
|
||||||
|
parser = Parser()
|
||||||
|
text = '''
|
||||||
|
a: int;
|
||||||
|
{
|
||||||
|
while(a < 10) {
|
||||||
|
a = a + 1;
|
||||||
|
if(a == 5)
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
a = a + 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
parser.parse(lexer.tokenize(text))
|
||||||
|
for l, t in enumerate(parser.lines):
|
||||||
|
print(l,':',t)
|
4
helper.py
Normal file
4
helper.py
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def print_err(*args, **kwargs):
|
||||||
|
print(*args, file=sys.stderr, **kwargs)
|
41
items.py
Normal file
41
items.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
# Item return type:
|
||||||
|
# { lines, result, is_float }
|
||||||
|
# - result: end result variable name(or the value itself in case of number literal, also if applicable)
|
||||||
|
# - is_float: true if the results are of type float(assuming there are results)
|
||||||
|
# idlist return type shall be a list of all ids and lines( { id, line } )
|
||||||
|
class Expression():
|
||||||
|
def __init__(self, result: str, is_float: bool):
|
||||||
|
self.result = result
|
||||||
|
self.is_float = is_float
|
||||||
|
|
||||||
|
# Statement type:
|
||||||
|
# - breaks: a list of all lines on which a brake occured, to be handled by relevant rules
|
||||||
|
class Statement():
|
||||||
|
def __init__(self, breaks: list):
|
||||||
|
self.breaks = breaks
|
||||||
|
|
||||||
|
class Id():
|
||||||
|
def __init__(self, id: str, line: int):
|
||||||
|
self.id = id
|
||||||
|
self.line = line
|
||||||
|
|
||||||
|
# Case type:
|
||||||
|
# - num: NUM token lexeme as string
|
||||||
|
# - line: line at which the comparison check is to be made
|
||||||
|
# - end: next line after the end of the case block
|
||||||
|
# - breaks: the list of breaks from the case stmtlist
|
||||||
|
class Case():
|
||||||
|
def __init__(self, num: str, line: int, end: int, breaks: list):
|
||||||
|
self.num = num
|
||||||
|
self.line = line
|
||||||
|
self.end = end
|
||||||
|
self.breaks = breaks
|
||||||
|
|
||||||
|
# Symbol table type:
|
||||||
|
# { is_float, line }
|
||||||
|
# - is_float: true if the type is float(we have only 2 types, it makes it easier to check)
|
||||||
|
# - line: line defined at, for error handling
|
||||||
|
class Symbol():
|
||||||
|
def __init__(self, is_float, line):
|
||||||
|
self.is_float = is_float
|
||||||
|
self.line = line
|
48
lexer.py
Normal file
48
lexer.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import sly
|
||||||
|
|
||||||
|
class Lexer(sly.Lexer):
|
||||||
|
# define token types
|
||||||
|
tokens = {
|
||||||
|
# Keyworkds
|
||||||
|
BREAK, CASE, DEFAULT, ELSE, FLOAT,
|
||||||
|
IF, INPUT, INT, OUTPUT, SWITCH, WHILE,
|
||||||
|
# Operators
|
||||||
|
RELOP, ADDOP, MULOP, OR, AND, NOT, CAST,
|
||||||
|
# misc
|
||||||
|
ID, NUM
|
||||||
|
}
|
||||||
|
ignore = ' \t'
|
||||||
|
literals = { '(', ')', '{', '}', ',', ':', ';', '=' }
|
||||||
|
|
||||||
|
# define each token
|
||||||
|
RELOP = r'(==|!=|<|>|>=|<=)'
|
||||||
|
ADDOP = r'(\+|-)'
|
||||||
|
MULOP = r'(\*|/)'
|
||||||
|
OR = '\|\|'
|
||||||
|
AND = '&&'
|
||||||
|
NOT = '!'
|
||||||
|
CAST = r'static_cast<(int|float)>'
|
||||||
|
|
||||||
|
ID = r'[a-zA-Z][a-zA-Z0-9]*'
|
||||||
|
NUM = r'[0-9]+(\.[0-9]*)?'
|
||||||
|
|
||||||
|
# define keywords on ID
|
||||||
|
ID['break'] = BREAK
|
||||||
|
ID['case'] = CASE
|
||||||
|
ID['default'] = DEFAULT
|
||||||
|
ID['else'] = ELSE
|
||||||
|
ID['float'] = FLOAT
|
||||||
|
ID['if'] = IF
|
||||||
|
ID['input'] = INPUT
|
||||||
|
ID['int'] = INT
|
||||||
|
ID['output'] = OUTPUT
|
||||||
|
ID['switch'] = SWITCH
|
||||||
|
ID['while'] = WHILE
|
||||||
|
|
||||||
|
@_(r'\n+')
|
||||||
|
def newline(self, t):
|
||||||
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
|
def error(self, t):
|
||||||
|
print(f"Error at line {self.lineno}: unexpect character {t.value[0]}")
|
||||||
|
self.index += 1
|
340
parser.py
Normal file
340
parser.py
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
import sly
|
||||||
|
from lexer import Lexer
|
||||||
|
from helper import *
|
||||||
|
from items import *
|
||||||
|
|
||||||
|
# Notes from reading:
|
||||||
|
# - it seems no scoping is needed, as it contains only one scope per program
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Parser(sly.Parser):
|
||||||
|
tokens = Lexer.tokens
|
||||||
|
symbol_table = {}
|
||||||
|
had_errors = False # Simply to know if we need to write the program in the end
|
||||||
|
lines = [ ]
|
||||||
|
last_used_temp = 0 # cpl doesnt allow _ in ids, so use t_NUM as variables
|
||||||
|
|
||||||
|
def next_temp(self):
|
||||||
|
self.last_used_temp += 1
|
||||||
|
return f't_{self.last_used_temp}'
|
||||||
|
|
||||||
|
@_('declarations stmt_block')
|
||||||
|
def program(self, p):
|
||||||
|
self.lines.append('HALT')
|
||||||
|
self.lines.append('Aviv Romem')
|
||||||
|
return None
|
||||||
|
|
||||||
|
@_('declarations declaration')
|
||||||
|
def declarations(self, p):
|
||||||
|
return None
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def declarations(self, p):
|
||||||
|
# Empty
|
||||||
|
return None
|
||||||
|
|
||||||
|
@_('idlist ":" type ";"')
|
||||||
|
def declaration(self, p):
|
||||||
|
floats = p[2].is_float
|
||||||
|
ids = p[0]
|
||||||
|
for i in ids:
|
||||||
|
if self.symbol_table.get(i.id):
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'ID {i.id} defined twice, first in line {self.symbol_table[i.id].line}, second time in line {i.line}')
|
||||||
|
else:
|
||||||
|
self.symbol_table[i.id] = Symbol(floats, i.line)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@_('INT', 'FLOAT')
|
||||||
|
def type(self, p):
|
||||||
|
return Expression('', p[0] == 'FLOAT') # return an item with the type and an empty result
|
||||||
|
|
||||||
|
@_('idlist "," ID')
|
||||||
|
def idlist(self, p):
|
||||||
|
return p[0] + [ Id(p[2], p.lineno) ]
|
||||||
|
|
||||||
|
@_('ID')
|
||||||
|
def idlist(self, p):
|
||||||
|
return [ Id(p[0], p.lineno) ]
|
||||||
|
|
||||||
|
@_(
|
||||||
|
'assignment_stmt', 'input_stmt', 'output_stmt', 'if_stmt',
|
||||||
|
'while_stmt', 'switch_stmt', 'break_stmt', 'stmt_block'
|
||||||
|
)
|
||||||
|
def stmt(self, p):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
@_('ID "=" expression ";"')
|
||||||
|
def assignment_stmt(self, p):
|
||||||
|
id = p[0]
|
||||||
|
exp = p[2]
|
||||||
|
if self.symbol_table.get(id) is None:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Unknown variable {id} at line {p.lineno}')
|
||||||
|
return Statement([])
|
||||||
|
sym = self.symbol_table.get(id)
|
||||||
|
if not sym.is_float and exp.is_float:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Trying to assign a float to an int at line {p.lineno}, did you forget a cast?')
|
||||||
|
if sym.is_float and not exp.is_float: # we need to cast exp to float
|
||||||
|
new_exp = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_exp} {exp.result}')
|
||||||
|
exp = Exception(new_exp, False)
|
||||||
|
command = 'RASN' if sym.is_float else 'IASN'
|
||||||
|
self.lines.append(f'{command} {id} {exp.result}')
|
||||||
|
return Statement([])
|
||||||
|
|
||||||
|
@_('INPUT "(" ID ")" ";"')
|
||||||
|
def input_stmt(self, p):
|
||||||
|
id = p[2]
|
||||||
|
if self.symbol_table.get(id) is None:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Unknown variable {id} at line {p.lineno}')
|
||||||
|
else:
|
||||||
|
sym = self.symbol_table.get(id)
|
||||||
|
command = 'RINP' if sym.is_float else 'IINP'
|
||||||
|
self.lines.append(f'{command} {id}')
|
||||||
|
return Statement([])
|
||||||
|
|
||||||
|
@_('OUTPUT "(" ID ")" ";"')
|
||||||
|
def output_stmt(self, p):
|
||||||
|
id = p[2]
|
||||||
|
if self.symbol_table.get(id) is None:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Unknown variable {id} at line {p.lineno}')
|
||||||
|
else:
|
||||||
|
sym = self.symbol_table.get(id)
|
||||||
|
command = 'RPRT' if sym.is_float else 'IPRT'
|
||||||
|
self.lines.append(f'{command} {id}')
|
||||||
|
return Statement([])
|
||||||
|
|
||||||
|
@_('IF "(" boolexpr ")" if_jump stmt if_jump ELSE stmt')
|
||||||
|
def if_stmt(self, p):
|
||||||
|
exp = p[2]
|
||||||
|
jump_else = p[4]
|
||||||
|
jump_end = p[6]
|
||||||
|
self.lines[jump_else] = f'JMPZ {jump_end + 1} {exp.result}'
|
||||||
|
self.lines[jump_end] = f'JUMP {len(self.lines)}'
|
||||||
|
return Statement(p[5].breaks + p[8].breaks) # return the list of breaks from both stmt s
|
||||||
|
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def if_jump(self, p):
|
||||||
|
# append an empty line as a placeholder for the jump
|
||||||
|
line = len(self.lines)
|
||||||
|
self.lines.append('')
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
@_('WHILE seen_WHILE "(" boolexpr ")" while_quit stmt')
|
||||||
|
def while_stmt(self, p):
|
||||||
|
# return to the start
|
||||||
|
check_line = p[1]
|
||||||
|
self.lines.append(f'JUMP {check_line}')
|
||||||
|
# add the check for the boolexpr
|
||||||
|
exp = p[3]
|
||||||
|
jump_if_fail = len(self.lines)
|
||||||
|
jump_line = p[5] # while_quit
|
||||||
|
exit_line = f'JMPZ {jump_if_fail} {exp.result}'
|
||||||
|
self.lines[jump_line] = exit_line
|
||||||
|
for break_line in p.stmt.breaks:
|
||||||
|
self.lines[break_line] = f'JUMP {jump_if_fail}'
|
||||||
|
return Statement([])
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def while_quit(self, p):
|
||||||
|
# append an empty line as a placeholder for the jump if the while check failed
|
||||||
|
line = len(self.lines)
|
||||||
|
self.lines.append('')
|
||||||
|
return line
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def seen_WHILE(self, p):
|
||||||
|
# helper to get the line number of when we start the check thing for a while expr
|
||||||
|
return len(self.lines)
|
||||||
|
|
||||||
|
@_('SWITCH "(" expression ")" "{" caselist DEFAULT ":" stmtlist "}"')
|
||||||
|
def switch_stmt(self, p):
|
||||||
|
exp = p[2]
|
||||||
|
cases = p[5]
|
||||||
|
if exp.is_float:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Invalid switch statement expression at line {p.lineno}: Expected an integer expression but found float')
|
||||||
|
cmp = self.next_temp()
|
||||||
|
break_line = f'JUMP {len(self.lines)}'
|
||||||
|
for c in cases:
|
||||||
|
self.lines[c.line] = f'IEQL {cmp} {exp.result} {c.num}'
|
||||||
|
self.lines[c.line + 1] = f'JMPZ {c.end} {cmp}'
|
||||||
|
for b in c.breaks:
|
||||||
|
self.lines[b] = break_line
|
||||||
|
for b in p[8].breaks: # breaks in default
|
||||||
|
self.lines[b] = break_line
|
||||||
|
|
||||||
|
|
||||||
|
@_('caselist CASE NUM case_check ":" stmtlist')
|
||||||
|
def caselist(self, p):
|
||||||
|
# Check that NUM doesnt have a dot(and indeed is an integer)
|
||||||
|
if p[2].find('.') != -1:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Invalid case constant at line {p.lineno}: Expected an integer but found a float')
|
||||||
|
# continue as if nothing happend
|
||||||
|
self.lines.append(f'JUMP {len(self.lines) + 2}') # Jump over the next comparison in the case of fallthrough
|
||||||
|
line = p[3]
|
||||||
|
return p[0] + [ Case(p[2], line, len(self.lines), p[5].breaks) ]
|
||||||
|
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def case_check(self, p):
|
||||||
|
line = len(self.lines)
|
||||||
|
self.lines.append('') # IEQL
|
||||||
|
self.lines.append('') # JPMZ
|
||||||
|
return line
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def caselist(self, p):
|
||||||
|
return []
|
||||||
|
|
||||||
|
@_('BREAK ";"')
|
||||||
|
def break_stmt(self, p):
|
||||||
|
line = len(self.lines)
|
||||||
|
self.lines.append('') # Empty line for break()
|
||||||
|
return Statement([line])
|
||||||
|
|
||||||
|
@_('"{" stmtlist "}"')
|
||||||
|
def stmt_block(self, p):
|
||||||
|
return p[1]
|
||||||
|
|
||||||
|
@_('stmtlist stmt')
|
||||||
|
def stmtlist(self, p):
|
||||||
|
return Statement(p[0].breaks + p[1].breaks)
|
||||||
|
|
||||||
|
@_('')
|
||||||
|
def stmtlist(self, p):
|
||||||
|
return Statement([]) # Empty item
|
||||||
|
|
||||||
|
@_('boolexpr OR boolterm')
|
||||||
|
def boolexpr(self, p):
|
||||||
|
res = self.next_temp()
|
||||||
|
# add, if one is not 0(aka true), result shall be non zero
|
||||||
|
self.lines.append(f'IADD {res} {p[0].result} {p[2].result}')
|
||||||
|
return Expression(res, False)
|
||||||
|
|
||||||
|
@_('boolterm')
|
||||||
|
def boolexpr(self, p):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
@_('boolterm AND boolfactor')
|
||||||
|
def boolterm(self, p):
|
||||||
|
res = self.next_temp()
|
||||||
|
# multiply, as if one side is 0(aka false), it will be false
|
||||||
|
# also, bool items are always int
|
||||||
|
self.lines.append(f'IMLT {res} {p[0].result} {p[2].result}')
|
||||||
|
return Statement(res, False)
|
||||||
|
|
||||||
|
@_('boolfactor')
|
||||||
|
def boolterm(self, p):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
@_('NOT "(" boolexpr ")"')
|
||||||
|
def boolfactor(self, p):
|
||||||
|
# as far as i understand, all bool expressions are integers(as RELOP always returns an int)
|
||||||
|
exp = p[2]
|
||||||
|
res = self.next_temp()
|
||||||
|
self.lines.append(f'IEQL {res} {exp.result} 0')
|
||||||
|
return Expression(res, False)
|
||||||
|
|
||||||
|
@_('expression RELOP expression')
|
||||||
|
def boolfactor(self, p):
|
||||||
|
float_op = p[0].is_float or p[2].is_float
|
||||||
|
lhs = p[0].result
|
||||||
|
rhs = p[2].result
|
||||||
|
if float_op: # Check if we need to cast someone to float
|
||||||
|
if not p[0].is_float:
|
||||||
|
new_term = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_term} {lhs}')
|
||||||
|
lhs = new_term
|
||||||
|
elif not p[2].is_float: # elif since if both are false we wont be here to begin with
|
||||||
|
new_term = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_term} {rhs}')
|
||||||
|
rhs = new_term
|
||||||
|
# TODO fix this and make it take care of all < > <= >= == != please yes thank you
|
||||||
|
command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB')
|
||||||
|
result = self.next_temp()
|
||||||
|
self.lines.append(f'{command} {result} {lhs} {rhs}')
|
||||||
|
return Expression(result, False)
|
||||||
|
|
||||||
|
@_('expression ADDOP term')
|
||||||
|
def expression(self, p):
|
||||||
|
float_op = p[0].is_float or p[2].is_float
|
||||||
|
lhs = p[0].result
|
||||||
|
rhs = p[2].result
|
||||||
|
if float_op: # Check if we need to cast someone to float
|
||||||
|
if not p[0].is_float:
|
||||||
|
new_term = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_term} {lhs}')
|
||||||
|
lhs = new_term
|
||||||
|
elif not p[2].is_float: # elif since if both are false we wont be here to begin with
|
||||||
|
new_term = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_term} {rhs}')
|
||||||
|
rhs = new_term
|
||||||
|
command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB')
|
||||||
|
result = self.next_temp()
|
||||||
|
self.lines.append(f'{command} {result} {lhs} {rhs}')
|
||||||
|
return Expression(result, float_op)
|
||||||
|
|
||||||
|
@_('term')
|
||||||
|
def expression(self, p):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
@_('term MULOP factor')
|
||||||
|
def term(self, p):
|
||||||
|
float_op = p[0].is_float or p[2].is_float
|
||||||
|
lhs = p[0].result
|
||||||
|
rhs = p[2].result
|
||||||
|
if float_op: # Check if we need to cast someone to float
|
||||||
|
if not p[0].is_float:
|
||||||
|
new_term = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_term} {lhs}')
|
||||||
|
lhs = new_term
|
||||||
|
elif not p[2].is_float: # elif since if both are false we wont be here to begin with
|
||||||
|
new_term = self.next_temp()
|
||||||
|
self.lines.append(f'ITOR {new_term} {rhs}')
|
||||||
|
rhs = new_term
|
||||||
|
command = ('R' if float_op else 'I') + ('MLT' if p[1] == '*' else 'DIV')
|
||||||
|
result = self.next_temp()
|
||||||
|
self.lines.append(f'{command} {result} {lhs} {rhs}')
|
||||||
|
return Expression(result, float_op)
|
||||||
|
|
||||||
|
@_('factor')
|
||||||
|
def term(self, p):
|
||||||
|
return p[0]
|
||||||
|
|
||||||
|
@_('"(" expression ")"')
|
||||||
|
def factor(self, p):
|
||||||
|
return p[1]
|
||||||
|
|
||||||
|
@_('CAST "(" expression ")"')
|
||||||
|
def factor(self, p):
|
||||||
|
cast_to_float = p[0].find('float') != -1 # if its not a cast to float, its probably a cast to int
|
||||||
|
exp = p[2]
|
||||||
|
if(cast_to_float != exp.is_float): # if we cast from int to float or from float to int
|
||||||
|
casted = self.next_temp()
|
||||||
|
command = 'ITOR' if cast_to_float else 'RTOI'
|
||||||
|
self.lines.append(f'{command} {casted} {exp.result}')
|
||||||
|
return Expression(casted, cast_to_float)
|
||||||
|
return Expression(exp.result, cast_to_float)
|
||||||
|
|
||||||
|
@_('ID')
|
||||||
|
def factor(self, p):
|
||||||
|
if self.symbol_table.get(p[0]) is None:
|
||||||
|
self.had_errors = True
|
||||||
|
print_err(f'Unknown variable {p[0]} at line {p.lineno}')
|
||||||
|
return Expression('0', False)
|
||||||
|
return Expression(p[0], self.symbol_table[p[0]].is_float)
|
||||||
|
|
||||||
|
@_('NUM')
|
||||||
|
def factor(self, p):
|
||||||
|
return Expression(p.NUM, p[0].find('.') != -1)
|
Loading…
Reference in a new issue