一晚上糊出一個語言「前端」

接著昨天一個晚上糊出語言「後端」的代碼繼續寫,今晚我們來寫Lexer和Parser

首先,這裡是昨天的完整代碼的代碼清單

from enum import Enumdef zfuck(description) : print("Error: ", description) input("press any key to continue . . . ") quit(-1)class ZFuncImpl : def __init__(self, paramList, funcBody) : self.paramList = paramList self.funcBody = funcBody def isBuiltin(self) : return Falseclass ZBuiltinFuncImpl : def __init__(self, func) : self.func = func def isBuiltin(self) : return Trueclass ZObjectKind(Enum) : Number = 1 Id = 2 List = 3 Func = 4 Eval = 5class ZObject : def __init__(self, kind, value) : self.kind = kind self.value = value def isNumber(self) : return self.kind == ZObjectKind.Number def isIdent(self) : return self.kind == ZObjectKind.Id def isList(self) : return self.kind == ZObjectKind.List def isFunc(self) : return self.kind == ZObjectKind.Func def isEval(self) : return self.kind == ZObjectKind.Evalclass ZSymbolTable : def __init__(self) : self.symbols = dict() def set(self, name, theObject) : self.symbols[name] = theObject def query(self, name) : return self.symbols.get(name)globalSymbolTable = ZSymbolTable()class ZEvaluator : def __init__(self) : pass def evaluate(self, theObject) : if theObject.isNumber() : return theObject elif theObject.isList() : return theObject elif theObject.isIdent() : found = globalSymbolTable.query(theObject.value) if found is None : zfuck("undefined reference to " + theObject.value) return found elif theObject.isEval() : maybeList = self.evaluate(theObject.value) if not maybeList.isList() : zfuck("evaluaing unknown thing") theList = maybeList.value if len(theList) == 0 : zfuck("evaluating empty list") maybeFunc = self.evaluate(theList[0]) if not maybeFunc.isFunc() : zfuck("not a function apply") funcImpl = maybeFunc.value if funcImpl.isBuiltin() : return funcImpl.func(theList[1:]) else : return self.simpleCall(funcImpl, theList[1:]) def simpleCall(self, funcImpl, argList) : if len(funcImpl.paramList) != len(argList) : zfuck("arguments not appropriate") for i in range(0, len(argList)) : globalSymbolTable.set(funcImpl.paramList[i], self.evaluate(argList[i])) return self.evaluate(funcImpl.funcBody);evaluator = ZEvaluator()

經大佬提醒,我所謂的「evaluate a list」實際上就是函數調用,更加類似於apply;不過這裡因為時guo間yu關lan系duo就不改啦~

此外我把zfuck函數稍稍修改了一下,讓它在退出之前先暫停。這樣我們就可以愉快地做錯誤處理辣!

class ZParser : def __init__(self) : pass

首先我們來寫Lexer,這個很簡單,從左到右掃描一遍輸入串,把掃描到的東西塞進一個list里就OK辣

def prelex(self, source) : tokens = list() i = 0 while i < len(source) : ch = source[i] if ch in self.IdChars : s = "" while (i < len(source)) and (ch in self.IdChars) : s = s + ch i = i + 1 if (i < len(source)) : ch = source[i] tokens.append(s) elif ch in self.NumChars : s = "" while (i < len(source)) and (ch in self.NumChars) : s += ch i = i + 1 if (i < len(source)) : ch = source[i] tokens.append(s) elif ch == ( or ch == ) or ch == $ : tokens.append(ch) i = i + 1 elif ch in self.BlankChars : i = i + 1 else : zfuck("ill character " + ch + "") return tokens

其中用到的常量們定義如下:

IdChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-*/%?><=" NumChars = "0123456789" BlankChars = " vf
"

接下來就是遞歸向下的parse了。我們令parse function都返回一個元組,元組的第一項是生成的AST,第二項是接下來還要處理的tokens

def parse(self, tokens) : if len(tokens) == 0 : zfuck("empty") if tokens[0] == "$" : tokens = tokens[1:len(tokens)] (evaluated, tokens) = self.parse(tokens) return (ZObject(ZObjectKind.Eval, evaluated), tokens) elif tokens[0] == "(" : return self.parseList(tokens) elif tokens[0].isnumeric() : return (ZObject(ZObjectKind.Number, int(tokens[0])), tokens[1:len(tokens)]) else : # identifier return (ZObject(ZObjectKind.Id, tokens[0]), tokens[1:len(tokens)]) def parseList(self, tokens) : if len(tokens) == 0 or tokens[0] != "(" : zfuck("illformed list") tokens = tokens[1:len(tokens)] concreteList = list() while (len(tokens) != 0) and (tokens[0] != ")") : (newElement, tokens) = self.parse(tokens) concreteList.append(newElement) if len(tokens) == 0 : zfuck("unclosed list") tokens = tokens[1:len(tokens)] return (ZObject(ZObjectKind.List, concreteList), tokens)

最後我們來寫一個驅動函數,用來解析一段源代碼並且返回AST

def parseSource(self, source) : return self.parse(self.prelex(source))[0]

Parser已經完成了,接下來創建一個REPL。首先,我們創建一個ZParser的實例

parser = ZParser()

然後給這個語言加上一點標準庫:

def stdlibQuitFunc(argList) : input("press any key to continue . . . ") quit(0)def stdlibAddFunc(argList) : theSum = 0 for arg in argList : evaluatedArg = evaluator.evaluate(arg) if not evaluatedArg.isNumber() : zfuck("not a number") theSum += evaluatedArg.value return ZObject(ZObjectKind.Number, theSum)def stdlibSubFunc(argList) : if len(argList) != 2 : zfuck("Incorrect arguments for -") evaluatedLhs = evaluator.evaluate(argList[0]) evaluatedRhs = evaluator.evaluate(argList[1]) if (not evaluatedLhs.isNumber()) or (not evaluatedRhs.isNumber()) : zfuck("Incorrect arguments for -") theDiff = evaluatedLhs.value - evaluatedRhs.value return ZObject(ZObjectKind.Number, theDiff)def stdlibLtFunc(argList) : if len(argList) != 2 : zfuck("Incorrect arguments for <") evaluatedLhs = evaluator.evaluate(argList[0]) evaluatedRhs = evaluator.evaluate(argList[1]) if (not evaluatedLhs.isNumber()) or (not evaluatedRhs.isNumber()) : zfuck("Incorrect arguments for <") return ZObject(ZObjectKind.Number, evaluatedLhs.value < evaluatedRhs.value)def stdlibIfFunc(argList) : if len(argList) != 3 : zfuck("Incorrect arguments for if") condResult = evaluator.evaluate(argList[0]).value if bool(condResult) : return evaluator.evaluate(argList[1]) else : return evaluator.evaluate(argList[2])def stdlibDefun(argList) : if len(argList) != 3 : zfuck("Incorrect arguments for defun") if argList[0].kind != ZObjectKind.Id or argList[1].kind != ZObjectKind.List : zfuck("Incorrect arguments for defun") paramList = list() for arg in argList[1].value : if arg.kind != ZObjectKind.Id : zfuck("Incorrect arguments for defun") paramList.append(arg.value) funcObject = ZObject(ZObjectKind.Func, ZFuncImpl(paramList, argList[2])) globalSymbolTable.set(argList[0].value, funcObject) return funcObjectdef stdlibPrintFunc(argList) : for arg in argList : evaluatedArg = evaluator.evaluate(arg) print(evaluatedArg.value, end = " ") print("
"); return ZObject(ZObjectKind.List, argList)globalSymbolTable.set("+", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibAddFunc)))globalSymbolTable.set("-", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibSubFunc)))globalSymbolTable.set("<", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibLtFunc)))globalSymbolTable.set("?", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibIfFunc)))globalSymbolTable.set("->", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibDefun)))globalSymbolTable.set("print", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibPrintFunc)))globalSymbolTable.set("quit", ZObject(ZObjectKind.Func, ZBuiltinFuncImpl(stdlibQuitFunc)))

沒錯,if,quit和defun也是函數,語言內建的函數~

最後加上repl代碼

nth = 0while True: src = input("In[" + str(nth) + "] ~ ") AST = parser.parseSource(src) print("Out[" + str(nth) + "] => " + str(evaluator.evaluate(AST).value)) nth = nth + 1

推薦閱讀:

關於Vert.x的冷知識
React Native開源項目如何運行?
大數據時代對編程有什麼影響?
求余和取模
C語言基礎:函數的聲明與定義

TAG:編程語言 | 解釋器 |