commit b17f895f0a9d457a6ea5887cb2683dda598ee4fa
Author: Rusty Striker <avivr903@tutanota.com>
Date:   Tue Mar 26 21:02:54 2024 +0200

    gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c18dd8d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..722127a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# Todo
+
+- [ ] Error handling(so it wont just print sly: unexpected token or whatever it says)
+- [ ] Handle comparisons properly(==,!=,<=,>=,<,>)
+- [ ] Proper readme about files
\ No newline at end of file
diff --git a/cpq.py b/cpq.py
new file mode 100644
index 0000000..0e7d962
--- /dev/null
+++ b/cpq.py
@@ -0,0 +1,31 @@
+import sly
+# for ease of reading, the compiler is broken down to multiple files
+# parser.py for the parser
+# lexer.py for the lexer
+# and helper.py for helper functions(such as print_err)
+from lexer import Lexer
+from parser import Parser
+from helper import print_err
+
+
+
+
+print_err('Aviv Romem')
+lexer = Lexer()
+parser = Parser()
+text = '''
+a: int;
+{
+    while(a < 10) {
+        a = a + 1;
+        if(a == 5) 
+            break;
+        else
+            a = a + 0;
+    }
+    
+}
+'''
+parser.parse(lexer.tokenize(text))
+for l, t in enumerate(parser.lines):
+    print(l,':',t)
diff --git a/helper.py b/helper.py
new file mode 100644
index 0000000..2098dac
--- /dev/null
+++ b/helper.py
@@ -0,0 +1,4 @@
+import sys
+
+def print_err(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
\ No newline at end of file
diff --git a/items.py b/items.py
new file mode 100644
index 0000000..67d6028
--- /dev/null
+++ b/items.py
@@ -0,0 +1,41 @@
+# Item return type:
+#   { lines, result, is_float }
+#   - result: end result variable name(or the value itself in case of number literal, also if applicable)
+#   - is_float: true if the results are of type float(assuming there are results)
+# idlist return type shall be a list of all ids and lines( { id, line } )
+class Expression():
+    def __init__(self, result: str, is_float: bool):
+        self.result = result
+        self.is_float = is_float
+
+# Statement type:
+#   - breaks: a list of all lines on which a brake occured, to be handled by relevant rules
+class Statement():
+    def __init__(self, breaks: list):
+        self.breaks = breaks
+
+class Id():
+    def __init__(self, id: str, line: int):
+        self.id = id
+        self.line = line
+
+# Case type:
+#   - num: NUM token lexeme as string
+#   - line: line at which the comparison check is to be made
+#   - end: next line after the end of the case block
+#   - breaks: the list of breaks from the case stmtlist
+class Case():
+    def __init__(self, num: str, line: int, end: int, breaks: list):
+        self.num = num
+        self.line = line
+        self.end = end
+        self.breaks = breaks
+
+# Symbol table type:
+#   { is_float, line }
+#   - is_float: true if the type is float(we have only 2 types, it makes it easier to check)
+#   - line: line defined at, for error handling
+class Symbol():
+    def __init__(self, is_float, line):
+        self.is_float = is_float
+        self.line = line
\ No newline at end of file
diff --git a/lexer.py b/lexer.py
new file mode 100644
index 0000000..e8d9a47
--- /dev/null
+++ b/lexer.py
@@ -0,0 +1,48 @@
+import sly
+
+class Lexer(sly.Lexer):
+    # define token types
+    tokens = { 
+        # Keyworkds
+        BREAK, CASE, DEFAULT, ELSE, FLOAT,
+        IF, INPUT, INT, OUTPUT, SWITCH, WHILE,
+        # Operators
+        RELOP, ADDOP, MULOP, OR, AND, NOT, CAST, 
+        # misc
+        ID, NUM 
+    }
+    ignore = ' \t'
+    literals = { '(', ')', '{', '}', ',', ':', ';', '=' }
+
+    # define each token
+    RELOP = r'(==|!=|<|>|>=|<=)'
+    ADDOP = r'(\+|-)'
+    MULOP = r'(\*|/)'
+    OR = '\|\|'
+    AND = '&&'
+    NOT = '!'
+    CAST = r'static_cast<(int|float)>'
+
+    ID = r'[a-zA-Z][a-zA-Z0-9]*'
+    NUM = r'[0-9]+(\.[0-9]*)?'
+
+    # define keywords on ID
+    ID['break'] = BREAK
+    ID['case'] = CASE
+    ID['default'] = DEFAULT
+    ID['else'] = ELSE
+    ID['float'] = FLOAT
+    ID['if'] = IF
+    ID['input'] = INPUT
+    ID['int'] = INT
+    ID['output'] = OUTPUT
+    ID['switch'] = SWITCH
+    ID['while'] = WHILE    
+
+    @_(r'\n+')
+    def newline(self, t):
+        self.lineno += t.value.count('\n')
+    
+    def error(self, t):
+        print(f"Error at line {self.lineno}: unexpect character {t.value[0]}")
+        self.index += 1
\ No newline at end of file
diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..f2a570f
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,340 @@
+import sly
+from lexer import Lexer
+from helper import *
+from items import *
+
+# Notes from reading:
+# - it seems no scoping is needed, as it contains only one scope per program
+
+
+
+
+class Parser(sly.Parser):
+    tokens = Lexer.tokens
+    symbol_table = {}
+    had_errors = False # Simply to know if we need to write the program in the end
+    lines = [ ]
+    last_used_temp = 0 # cpl doesnt allow _ in ids, so use t_NUM as variables
+
+    def next_temp(self):
+        self.last_used_temp += 1
+        return f't_{self.last_used_temp}'
+
+    @_('declarations stmt_block')
+    def program(self, p):
+        self.lines.append('HALT')
+        self.lines.append('Aviv Romem')
+        return None
+    
+    @_('declarations declaration')
+    def declarations(self, p):
+        return None
+
+    @_('')
+    def declarations(self, p):
+        # Empty
+        return None
+
+    @_('idlist ":" type ";"')
+    def declaration(self, p):
+        floats = p[2].is_float
+        ids = p[0]
+        for i in ids:
+            if self.symbol_table.get(i.id):
+                self.had_errors = True
+                print_err(f'ID {i.id} defined twice, first in line {self.symbol_table[i.id].line}, second time in line {i.line}')
+            else:
+                self.symbol_table[i.id] = Symbol(floats, i.line)
+        return None
+
+
+    @_('INT', 'FLOAT')
+    def type(self, p):
+        return Expression('', p[0] == 'FLOAT') # return an item with the type and an empty result
+    
+    @_('idlist "," ID')
+    def idlist(self, p):
+        return p[0] + [ Id(p[2], p.lineno) ]
+    
+    @_('ID')
+    def idlist(self, p):
+        return [ Id(p[0], p.lineno) ]
+    
+    @_(
+        'assignment_stmt', 'input_stmt', 'output_stmt', 'if_stmt',
+        'while_stmt', 'switch_stmt', 'break_stmt', 'stmt_block'
+    )
+    def stmt(self, p):
+        return p[0]
+    
+    @_('ID "=" expression ";"')
+    def assignment_stmt(self, p):
+        id = p[0]
+        exp = p[2]
+        if self.symbol_table.get(id) is None:
+            self.had_errors = True
+            print_err(f'Unknown variable {id} at line {p.lineno}')
+            return Statement([])
+        sym = self.symbol_table.get(id)
+        if not sym.is_float and exp.is_float:
+            self.had_errors = True
+            print_err(f'Trying to assign a float to an int at line {p.lineno}, did you forget a cast?')
+        if sym.is_float and not exp.is_float: # we need to cast exp to float
+            new_exp = self.next_temp()
+            self.lines.append(f'ITOR {new_exp} {exp.result}')
+            exp = Exception(new_exp, False)
+        command = 'RASN' if sym.is_float else 'IASN'
+        self.lines.append(f'{command} {id} {exp.result}')
+        return Statement([])
+    
+    @_('INPUT "(" ID ")" ";"')
+    def input_stmt(self, p):
+        id = p[2]
+        if self.symbol_table.get(id) is None:
+            self.had_errors = True
+            print_err(f'Unknown variable {id} at line {p.lineno}')
+        else:
+            sym = self.symbol_table.get(id)
+            command = 'RINP' if sym.is_float else 'IINP'
+            self.lines.append(f'{command} {id}')
+        return Statement([])
+    
+    @_('OUTPUT "(" ID ")" ";"')
+    def output_stmt(self, p):
+        id = p[2]
+        if self.symbol_table.get(id) is None:
+            self.had_errors = True
+            print_err(f'Unknown variable {id} at line {p.lineno}')
+        else:
+            sym = self.symbol_table.get(id)
+            command = 'RPRT' if sym.is_float else 'IPRT'
+            self.lines.append(f'{command} {id}')
+        return Statement([])
+    
+    @_('IF "(" boolexpr ")" if_jump stmt if_jump ELSE stmt')
+    def if_stmt(self, p):
+        exp = p[2]
+        jump_else = p[4]
+        jump_end = p[6]
+        self.lines[jump_else] = f'JMPZ {jump_end + 1} {exp.result}'
+        self.lines[jump_end] = f'JUMP {len(self.lines)}'
+        return Statement(p[5].breaks + p[8].breaks) # return the list of breaks from both stmt s
+
+    
+    @_('')
+    def if_jump(self, p):
+        # append an empty line as a placeholder for the jump
+        line = len(self.lines)
+        self.lines.append('') 
+        return line
+
+
+    @_('WHILE seen_WHILE "(" boolexpr ")" while_quit stmt')
+    def while_stmt(self, p):
+        # return to the start
+        check_line = p[1]
+        self.lines.append(f'JUMP {check_line}')
+        # add the check for the boolexpr
+        exp = p[3]
+        jump_if_fail = len(self.lines)
+        jump_line = p[5] # while_quit
+        exit_line = f'JMPZ {jump_if_fail} {exp.result}'
+        self.lines[jump_line] = exit_line
+        for break_line in p.stmt.breaks:
+            self.lines[break_line] = f'JUMP {jump_if_fail}'
+        return Statement([])
+
+    @_('')
+    def while_quit(self, p):
+        # append an empty line as a placeholder for the jump if the while check failed
+        line = len(self.lines)
+        self.lines.append('') 
+        return line
+
+    @_('')
+    def seen_WHILE(self, p): 
+        # helper to get the line number of when we start the check thing for a while expr
+        return len(self.lines)
+    
+    @_('SWITCH "(" expression ")" "{" caselist DEFAULT ":" stmtlist "}"')
+    def switch_stmt(self, p):
+        exp = p[2]
+        cases = p[5]
+        if exp.is_float:
+            self.had_errors = True
+            print_err(f'Invalid switch statement expression at line {p.lineno}: Expected an integer expression but found float')
+        cmp = self.next_temp()
+        break_line = f'JUMP {len(self.lines)}'
+        for c in cases:
+            self.lines[c.line] = f'IEQL {cmp} {exp.result} {c.num}'
+            self.lines[c.line + 1] = f'JMPZ {c.end} {cmp}'
+            for b in c.breaks:
+                self.lines[b] = break_line
+        for b in p[8].breaks: # breaks in default
+            self.lines[b] = break_line
+        
+    
+    @_('caselist CASE NUM case_check ":" stmtlist')
+    def caselist(self, p):
+        # Check that NUM doesnt have a dot(and indeed is an integer)
+        if p[2].find('.') != -1:
+            self.had_errors = True
+            print_err(f'Invalid case constant at line {p.lineno}: Expected an integer but found a float')
+            # continue as if nothing happend
+        self.lines.append(f'JUMP {len(self.lines) + 2}') # Jump over the next comparison in the case of fallthrough
+        line = p[3]
+        return p[0] + [ Case(p[2], line, len(self.lines), p[5].breaks) ]
+
+    
+    @_('')
+    def case_check(self, p):
+        line = len(self.lines)
+        self.lines.append('') # IEQL
+        self.lines.append('') # JPMZ
+        return line
+
+    @_('')
+    def caselist(self, p):
+        return []
+    
+    @_('BREAK ";"')
+    def break_stmt(self, p):
+        line = len(self.lines)
+        self.lines.append('') # Empty line for break()
+        return Statement([line])
+    
+    @_('"{" stmtlist "}"')
+    def stmt_block(self, p):
+        return p[1]
+    
+    @_('stmtlist stmt')
+    def stmtlist(self, p):
+        return Statement(p[0].breaks + p[1].breaks)
+    
+    @_('')
+    def stmtlist(self, p):
+        return Statement([]) # Empty item
+    
+    @_('boolexpr OR boolterm')
+    def boolexpr(self, p):
+        res = self.next_temp()
+        # add, if one is not 0(aka true), result shall be non zero
+        self.lines.append(f'IADD {res} {p[0].result} {p[2].result}')
+        return Expression(res, False)
+    
+    @_('boolterm')
+    def boolexpr(self, p):
+        return p[0]
+    
+    @_('boolterm AND boolfactor')
+    def boolterm(self, p):
+        res = self.next_temp()
+        # multiply, as if one side is 0(aka false), it will be false
+        # also, bool items are always int
+        self.lines.append(f'IMLT {res} {p[0].result} {p[2].result}')
+        return Statement(res, False)
+    
+    @_('boolfactor')
+    def boolterm(self, p):
+        return p[0]
+    
+    @_('NOT "(" boolexpr ")"')
+    def boolfactor(self, p):
+        # as far as i understand, all bool expressions are integers(as RELOP always returns an int)
+        exp = p[2]
+        res = self.next_temp()
+        self.lines.append(f'IEQL {res} {exp.result} 0')
+        return Expression(res, False)
+    
+    @_('expression RELOP expression')
+    def boolfactor(self, p):
+        float_op = p[0].is_float or p[2].is_float
+        lhs = p[0].result
+        rhs = p[2].result
+        if float_op: # Check if we need to cast someone to float
+            if not p[0].is_float:
+                new_term = self.next_temp()
+                self.lines.append(f'ITOR {new_term} {lhs}')
+                lhs = new_term
+            elif not p[2].is_float: # elif since if both are false we wont be here to begin with
+                new_term = self.next_temp()
+                self.lines.append(f'ITOR {new_term} {rhs}')
+                rhs = new_term
+        # TODO fix this and make it take care of all < > <= >= == != please yes thank you
+        command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB')
+        result = self.next_temp()
+        self.lines.append(f'{command} {result} {lhs} {rhs}')
+        return Expression(result, False)
+    
+    @_('expression ADDOP term')
+    def expression(self, p):
+        float_op = p[0].is_float or p[2].is_float
+        lhs = p[0].result
+        rhs = p[2].result
+        if float_op: # Check if we need to cast someone to float
+            if not p[0].is_float:
+                new_term = self.next_temp()
+                self.lines.append(f'ITOR {new_term} {lhs}')
+                lhs = new_term
+            elif not p[2].is_float: # elif since if both are false we wont be here to begin with
+                new_term = self.next_temp()
+                self.lines.append(f'ITOR {new_term} {rhs}')
+                rhs = new_term
+        command = ('R' if float_op else 'I') + ('ADD' if p[1] == '+' else 'SUB')
+        result = self.next_temp()
+        self.lines.append(f'{command} {result} {lhs} {rhs}')
+        return Expression(result, float_op)
+    
+    @_('term')
+    def expression(self, p):
+        return p[0]
+    
+    @_('term MULOP factor')
+    def term(self, p):
+        float_op = p[0].is_float or p[2].is_float
+        lhs = p[0].result
+        rhs = p[2].result
+        if float_op: # Check if we need to cast someone to float
+            if not p[0].is_float:
+                new_term = self.next_temp()
+                self.lines.append(f'ITOR {new_term} {lhs}')
+                lhs = new_term
+            elif not p[2].is_float: # elif since if both are false we wont be here to begin with
+                new_term = self.next_temp()
+                self.lines.append(f'ITOR {new_term} {rhs}')
+                rhs = new_term
+        command = ('R' if float_op else 'I') + ('MLT' if p[1] == '*' else 'DIV')
+        result = self.next_temp()
+        self.lines.append(f'{command} {result} {lhs} {rhs}')
+        return Expression(result, float_op)
+    
+    @_('factor')
+    def term(self, p):
+        return p[0]
+    
+    @_('"(" expression ")"')
+    def factor(self, p):
+        return p[1]
+
+    @_('CAST "(" expression ")"')
+    def factor(self, p):
+        cast_to_float = p[0].find('float') != -1 # if its not a cast to float, its probably a cast to int
+        exp = p[2]
+        if(cast_to_float != exp.is_float): # if we cast from int to float or from float to int
+            casted = self.next_temp()
+            command = 'ITOR' if cast_to_float else 'RTOI'
+            self.lines.append(f'{command} {casted} {exp.result}')
+            return Expression(casted, cast_to_float)
+        return Expression(exp.result, cast_to_float)
+    
+    @_('ID')
+    def factor(self, p):
+        if self.symbol_table.get(p[0]) is None:
+            self.had_errors = True
+            print_err(f'Unknown variable {p[0]} at line {p.lineno}')
+            return Expression('0', False)
+        return Expression(p[0], self.symbol_table[p[0]].is_float)
+    
+    @_('NUM')
+    def factor(self, p):
+        return Expression(p.NUM, p[0].find('.') != -1)