# -*- coding: utf-8 -*- """ Parser for the Eightebed programming language. """ import logging import re from .rooibos import (Stream, RegLexer, Terminal, NonTerminal, Alternation, Sequence, Asteration, Optional, Grammar) from .context import Context logger = logging.getLogger("parser") class TypeError(RuntimeError): pass class Eightebed(object): def __init__(self, data): self.typedecls = data[0] self.vardecls = data[1] self.block = data[2] def __repr__(self): return "%s(%s, %s, %s)" % ( self.__class__.__name__, repr(self.typedecls), repr(self.vardecls), repr(self.block) ) def typecheck(self, types, vars): for typedecl in self.typedecls: typedecl.typecheck(types, vars) for vardecl in self.vardecls: vardecl.typecheck(types, vars) self.block.typecheck(types, vars) def vanalyze(self, context): self.block.vanalyze(context) def emit(self, stream, options): stream.write("""\ /* Achtung! This Source was Automatically Generated by %s! */ #include #include #include #include """ % options.pedigree) if options.trace_marking: stream.write("#define TRACE_MARKING 1\n") stream.write("""\ typedef struct _ptr { void *p; int valid; } _ptr; static void _8ebed_invalidate(_ptr *ptr) { ptr->valid = 0; } static int _8ebed_valid(_ptr ptr) { return ptr.valid; } static int _8ebed_is_alias(_ptr a, _ptr b) { return a.p == b.p; } static _ptr _8ebed_malloc(size_t size) { _ptr ptr; ptr.p = malloc(size); ptr.valid = (ptr.p != NULL); if (ptr.p != NULL) { memset(ptr.p, 0, size); } return ptr; } static void _mark__root(_ptr); static void _8ebed_free(_ptr *ptr) { if (!_8ebed_valid(*ptr)) return; _mark__root(*ptr); free(ptr->p); _8ebed_invalidate(ptr); } """) for typedecl in self.typedecls: typedecl.emit(stream, options) for vardecl in self.vardecls: vardecl.emit(stream) stream.write(r"""\ static void _mark__root(_ptr outcast) { #ifdef TRACE_MARKING fprintf(stderr, "-> BEGIN marking %s @root\n", (long)outcast.p); #endif """ % options.pointer_format) for vardecl in self.vardecls: vardecl.emit_marker(stream) stream.write(r""" #ifdef TRACE_MARKING fprintf(stderr, "-> END marking %s @root\n", (long)outcast.p); #endif } int main(int argc, char **argv) { """ % options.pointer_format) self.block.emit(stream) stream.write("}\n") class Block(object): def __init__(self, data): self.stmts = data[1] def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.stmts)) def typecheck(self, types, vars): block_types = Context(parent=types) block_vars = Context(parent=vars) for stmt in self.stmts: stmt.typecheck(block_types, block_vars) def vanalyze(self, context): for stmt in self.stmts: stmt.vanalyze(context) def emit(self, stream): for stmt in self.stmts: stmt.emit(stream) class TypeDecl(object): def __init__(self, data): self.name = data[1] self.type = data[2] def __repr__(self): return "TypeDecl(%s, %s)" % (repr(self.name), repr(self.type)) def typecheck(self, types, vars): types.declare(self.name, self.type) self.type.typecheck(types, vars) if not isinstance(self.type, TypeStruct): raise TypeError("Only structs may be named") return self.type def emit(self, stream, options): if isinstance(self.type, TypeStruct): stream.write("typedef \n") self.type.emit_forward(stream) stream.write(" %s;\n" % self.name) self.type.emit(stream) stream.write(";\n") stream.write("static void mark_%s(_ptr outcast, %s* p) {" % (self.name, self.name)) marking_text = (options.pointer_format + (" @%s " % self.name) + options.pointer_format) stream.write(r""" #ifdef TRACE_MARKING fprintf(stderr, "-> BEGIN marking %s\n", (long)outcast.p, (long)p); #endif """ % marking_text) for member in self.type.members: if isinstance(member.type, TypePtr): stream.write(""" if (_8ebed_is_alias(outcast, p->%s)) { _8ebed_invalidate(&p->%s); } else if (_8ebed_valid(p->%s)) { mark_""" % (member.name, member.name, member.name)) member.type.points_to().emit(stream) stream.write("(outcast, (") member.type.points_to().emit(stream) stream.write(" *)(p->%s.p));\n }\n" % member.name) stream.write(r""" #ifdef TRACE_MARKING fprintf(stderr, "-> END marking %s\n", (long)outcast.p, (long)p); #endif } """ % marking_text) else: stream.write("typedef \n") self.type.emit(stream) stream.write(" %s;\n" % self.name) stream.write("\n") # These classes double as AST components and as type expressions. class Type(object): def equiv(self, other): raise NotImplementedError def points_to(self): return None def resolve(self, types): return self def get_member_type(self, name): return None class TypeVoid(Type): def __init__(self, data=None): pass def equiv(self, other): return isinstance(other, TypeVoid) def emit(self, stream): stream.write("void") class TypeInt(Type): def __init__(self, data=None): pass def __repr__(self): return "%s()" % (self.__class__.__name__) def typecheck(self, types, vars): return self def equiv(self, other): return isinstance(other, TypeInt) def emit(self, stream): stream.write("int") struct_id = 0 class TypeStruct(Type): def __init__(self, data): global struct_id self.members = data[2] self.id = struct_id struct_id += 1 def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.members)) def typecheck(self, types, vars): for member in self.members: type_ = member.typecheck(types, vars) if isinstance(type_, TypeStruct): raise TypeError("Structs may not contain other structs") return self def equiv(self, other): return False def emit(self, stream): stream.write("struct s_%s {\n" % self.id) for member in self.members: member.emit(stream) stream.write("}") def emit_forward(self, stream): stream.write("struct s_%s" % self.id) def get_member_type(self, name): for decl in self.members: if decl.name == name: return decl.type return None class TypePtr(Type): def __init__(self, data): self.target = data[2] def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.target)) def typecheck(self, types, vars): self.target.typecheck(types, vars) if isinstance(self.target, TypeNamed): return self else: raise TypeError("Pointer type must point to named type") def equiv(self, other): return isinstance(other, TypePtr) and self.target.equiv(other.target) def points_to(self): return self.target def emit(self, stream): stream.write("/* ") self.target.emit(stream) stream.write("*") stream.write(" */ ") stream.write("_ptr") class TypeNamed(Type): def __init__(self, data): self.name = data def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.name)) def typecheck(self, types, vars): return True # can't look self up yet, might not exist yet def equiv(self, other): return isinstance(other, TypeNamed) and self.name == other.name def emit(self, stream): stream.write(self.name) def resolve(self, types): return types.lookup(self.name) class Decl(object): def __init__(self, data): self.type = data[0] self.name = data[1] def __repr__(self): return "%s(%r, %r)" % (self.__class__.__name__, self.name, self.type) def typecheck(self, types, vars): self.type.typecheck(types, vars) return self.type def emit(self, stream): self.type.emit(stream) stream.write(" %s;\n" % self.name) class VarDecl(object): def __init__(self, data): decl = data[1] self.type = decl.type self.name = decl.name def __repr__(self): return "%s(%r, %r)" % (self.__class__.__name__, self.name, self.type) def typecheck(self, types, vars): self.type.typecheck(types, vars) vars.declare(self.name, self.type) return self.type def emit(self, stream): self.type.emit(stream) stream.write(" %s;\n" % self.name) def emit_marker(self, stream): if isinstance(self.type, TypePtr): stream.write(""" if (_8ebed_is_alias(outcast, %s)) { _8ebed_invalidate(&%s); } else if (_8ebed_valid(%s)) { mark_""" % (self.name, self.name, self.name)) self.type.points_to().emit(stream) stream.write("(outcast, (") self.type.points_to().emit(stream) stream.write(" *)%s.p);\n }\n" % self.name) class WhileStmt(object): def __init__(self, data): self.expr = data[1] self.block = data[2] def __repr__(self): return "%s(%r, %r)" % (self.__class__.__name__, self.expr, self.block) def typecheck(self, types, vars): self.expr.typecheck(types, vars) self.block.typecheck(types, vars) return TypeVoid() def vanalyze(self, context): self.expr.vanalyze(context) self.block.vanalyze(context) def emit(self, stream): stream.write("while(") self.expr.emit(stream) stream.write(") {\n") self.block.emit(stream) stream.write("}\n") class IfStmt(object): def __init__(self, data): self.expr = data[1] self.then = data[2] elsepart = data[3] if elsepart: self.else_ = elsepart[0][1] else: self.else_ = Block(['{', [], '}']) def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.expr, self.then, self.else_) def typecheck(self, types, vars): self.expr.typecheck(types, vars) self.then.typecheck(types, vars) self.else_.typecheck(types, vars) return TypeVoid() def vanalyze(self, context): self.expr.vanalyze(context) # If the test expr is exactly "valid x", put x into context, # asserting that it is valid hereafter in this block subcontext = Context(parent=context) if isinstance(self.expr, ValidExpr): if isinstance(self.expr.expr, VarRef): subcontext[self.expr.expr.name] = True self.then.vanalyze(subcontext) self.else_.vanalyze(context) def emit(self, stream): stream.write("if(") self.expr.emit(stream) stream.write(") {\n") self.then.emit(stream) stream.write("} else {\n") self.else_.emit(stream) stream.write("}\n") class FreeStmt(object): def __init__(self, data): self.ref = data[1] def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.ref)) def typecheck(self, types, vars): ref_type = self.ref.typecheck(types, vars) if ref_type.points_to() is None: raise TypeError("%r is not a pointer type" % ref_type) return TypeVoid() def vanalyze(self, context): self.ref.vanalyze(context) # End safe area -- remove all assertions of validity hereafter. context.empty() def emit(self, stream): stream.write("_8ebed_free(&") self.ref.emit(stream) stream.write(");\n") class PrintStmt(object): def __init__(self, data): self.expr = data[1] def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.expr)) def typecheck(self, types, vars): expr_type = self.expr.typecheck(types, vars) if not expr_type.equiv(TypeInt()): raise TypeError("%r is not an int" % expr_type) return TypeVoid() def vanalyze(self, context): self.expr.vanalyze(context) def emit(self, stream): stream.write("printf(\"%d \", ") self.expr.emit(stream) stream.write(");\n") class AssignStmt(object): def __init__(self, data): self.ref = data[0] self.expr = data[2] def __repr__(self): return "%s(%r, %r)" % (self.__class__.__name__, self.ref, self.expr) def typecheck(self, types, vars): tlhs = self.ref.typecheck(types, vars) trhs = self.expr.typecheck(types, vars) if trhs.equiv(tlhs): return TypeVoid() else: raise TypeError("%r (%r) not equivalent to %r (%r) for vars %s" % (tlhs, self.ref, trhs, self.expr, vars)) def vanalyze(self, context): self.ref.vanalyze(context) self.expr.vanalyze(context) # End safe area -- remove all assertions of validity hereafter. context.empty() def emit(self, stream): self.ref.emit(stream) stream.write(" = ") self.expr.emit(stream) stream.write(";\n") class BinOpExpr(object): map = { '+': '+', '-': '-', '*': '*', '/': '/', '=': '==', '>': '>', '&': '&&', '|': '||', } def __init__(self, data): self.lhs = data[1] self.op = data[2] self.rhs = data[3] def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.lhs, self.op, self.rhs) def typecheck(self, types, vars): trhs = self.lhs.typecheck(types, vars) tlhs = self.rhs.typecheck(types, vars) if not tlhs.equiv(TypeInt()): raise TypeError("lhs %r is not an int" % tlhs) if not trhs.equiv(TypeInt()): raise TypeError("rhs %r is not an int" % trhs) return TypeInt() def vanalyze(self, context): self.lhs.vanalyze(context) self.rhs.vanalyze(context) def emit(self, stream): stream.write("(") self.lhs.emit(stream) stream.write(" %s " % self.map[self.op]) self.rhs.emit(stream) stream.write(")") class MallocExpr(object): def __init__(self, data): self.type = data[1] def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.type)) def typecheck(self, types, vars): return TypePtr(['', '', self.type]) def vanalyze(self, context): pass def emit(self, stream): stream.write("_8ebed_malloc(sizeof(") self.type.emit(stream) stream.write("))") class ValidExpr(object): def __init__(self, data): self.expr = data[1] def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.expr)) def typecheck(self, types, vars): expr_type = self.expr.typecheck(types, vars) if expr_type.points_to() is None: raise TypeError("%r is not a pointer type" % expr_type) return TypeInt() def vanalyze(self, context): self.expr.vanalyze(context) def emit(self, stream): stream.write("_8ebed_valid(") self.expr.emit(stream) stream.write(")") class DottedRef(object): def __init__(self, data): self.source = data[1] self.member_name = data[4] def __repr__(self): return "%s(%r, %r)" % (self.__class__.__name__, self.source, self.member_name) def typecheck(self, types, vars): source_type = self.source.typecheck(types, vars) source_type = source_type.resolve(types) member_type = source_type.get_member_type(self.member_name) if member_type is None: raise TypeError("%r does not have member %s" % (source_type, self.member_name)) return member_type def vanalyze(self, context, deref=False): self.source.vanalyze(context, deref=deref) def emit(self, stream): self.source.emit(stream) stream.write(".%s" % self.member_name) class DeRef(object): def __init__(self, data): self.source = data[1] self._dest_type = None def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.source)) def typecheck(self, types, vars): source_type = self.source.typecheck(types, vars) dest_type = source_type.points_to() if dest_type is None: raise TypeError("%r is not a pointer type" % source_type) self._dest_type = dest_type return dest_type def vanalyze(self, context, deref=False): self.source.vanalyze(context, deref=True) def emit(self, stream): stream.write("(*(") self._dest_type.emit(stream) stream.write(" *)") self.source.emit(stream) stream.write(".p)") class VarRef(object): def __init__(self, data): self.name = data def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.name)) def typecheck(self, types, vars): #if self.name == 'i': # raise NotImplementedError, vars.lookup(self.name) return vars.lookup(self.name) def vanalyze(self, context, deref=False): if deref: if not context.lookup(self.name, default=False): raise TypeError("Attempt to dereference %s " "in non-safe context" % self.name) def emit(self, stream): stream.write(self.name) class IntConst(object): def __init__(self, data): self.value = int(data) def __repr__(self): return "%s(%s)" % (self.__class__.__name__, repr(self.value)) def typecheck(self, types, vars): return TypeInt() def vanalyze(self, context, deref=False): pass def emit(self, stream): stream.write(str(self.value)) g = Grammar() g['Eightebed'] = Sequence(Asteration(NonTerminal('TypeDecl')), Asteration(NonTerminal('VarDecl')), NonTerminal('Block')).construct(Eightebed) g['Block'] = Sequence(Terminal('{'), Asteration(NonTerminal('Stmt')), Terminal('}')).construct(Block) g['TypeDecl'] = Sequence(Terminal('type'), NonTerminal('TypeName'), NonTerminal('Type'), Terminal(';')).construct(TypeDecl) g['Type'] = Alternation(Terminal('int').construct(TypeInt), Sequence(Terminal('struct'), Terminal('{'), Asteration(NonTerminal('Decl')), Terminal('}')).construct(TypeStruct), Sequence(Terminal('ptr'), Terminal('to'), NonTerminal('Type')).construct(TypePtr), NonTerminal('TypeName').construct(TypeNamed)) g['Decl'] = Sequence(NonTerminal('Type'), NonTerminal('VarName'), Terminal(';')).construct(Decl) g['VarDecl'] = Sequence(Terminal('var'), NonTerminal('Decl')).construct(VarDecl) g['Stmt'] = Alternation(Sequence(Terminal('while'), NonTerminal('Expr'), NonTerminal('Block')).construct(WhileStmt), Sequence(Terminal('if'), NonTerminal('Expr'), NonTerminal('Block'), Optional(Sequence(Terminal('else'), NonTerminal('Block'))) ).construct(IfStmt), Sequence(Terminal('free'), NonTerminal('Ref'), Terminal(';')).construct(FreeStmt), Sequence(Terminal('print'), NonTerminal('Expr'), Terminal(';')).construct(PrintStmt), Sequence(NonTerminal('Ref'), Terminal('='), NonTerminal('Expr'), Terminal(';') ).construct(AssignStmt)) g['Ref'] = Alternation(Sequence(Terminal('['), NonTerminal('Ref'), Terminal(']'), Terminal('.'), NonTerminal('VarName')).construct(DottedRef), Sequence(Terminal('@'), NonTerminal('Ref')).construct(DeRef), NonTerminal('VarName').construct(VarRef)) g['Expr'] = Alternation(Sequence(Terminal('('), NonTerminal('Expr'), NonTerminal('BinOp'), NonTerminal('Expr'), Terminal(')')).construct(BinOpExpr), Sequence(Terminal('malloc'), NonTerminal('Type')).construct(MallocExpr), Sequence(Terminal('valid'), NonTerminal('Expr')).construct(ValidExpr), NonTerminal('IntLit').construct(IntConst), NonTerminal('Ref')) g['BinOp'] = Alternation(Terminal('+'), Terminal('-'), Terminal('*'), Terminal('/'), Terminal('='), Terminal('>'), Terminal('&'), Terminal('|')) g['TypeName'] = Terminal(lambda x: re.match('^[a-zA-Z]\w*$', x)) g['VarName'] = Terminal(lambda x: re.match('^[a-zA-Z]\w*$', x)) g['IntLit'] = Terminal(lambda x: re.match('^\d+$', x)) def parse(text): r = RegLexer() r.ignore(r'\s+') r.register(r'(\d+)') r.register(r'(\(|\)|\[|\]|\;|\{|\}|\=|\+|\-|\*|\/|\,|\@|\.|\>|\&|\|)') r.register(r'([a-zA-Z]\w*)') s = Stream(r(text)) return g.parse('Eightebed', s) def parse_file(filename): f = open(filename, "r") contents = f.read() f.close() return parse(contents) if __name__ == "__main__": import doctest doctest.testmod()