正在看极客时间宫文学老师的编译原理之美,用swift playground写了一个第二课“int age >= 45”的词法解析DEMO
为了保持原课程代码,DEMO用了顺序结构,看起来有点散乱😂,后面我再抽时间优化一下
//识别:“int age >= 45” import Foundation enum DfaState: String { case Initial = "Initial" case Id = "Id" case IntLiteral = "IntLiteral" case GT = "GT" case GE = "GE" } enum TokenType: String { case Identifier = "Identifier" case IntLiteral = "IntLiteral" case GT = "GT" case GE = "GE" } func isAlpha(_ ch:Character) -> Bool { return ch.isLetter } func isDigit(_ ch:Character) -> Bool { return ch.isNumber } class Token { var type: TokenType? var tokenText: String = "" } var newState = DfaState.Initial var token = Token() var tokenText = token.tokenText //FIXME:这里赋值识别的语句 let str = "int age >= 45" func initToken() -> DfaState { token = Token() tokenText = token.tokenText return .Initial } func read(_ string: String) { let strArr = string.components(separatedBy: CharacterSet(charactersIn: " ")) for string in strArr { readToken(string) print("token is: \n" + (tokenText) + "\ntype is:\n" + ((token.type)?.rawValue ?? "undefined")) initToken() } } func readToken(_ string:String) { checkType(string.first) let str = String(string.dropFirst()) if (str.count == 0) { return } for ch in str { readLeft(ch) } } func checkType(_ ch:Character?) { if (ch == nil) { return } let ch = ch! if (isAlpha(ch)) { // 第一个字符是字母 newState = DfaState.Id; // 进入 Id 状态 token.type = TokenType.Identifier; tokenText.append(ch); } else if (isDigit(ch)) { // 第一个字符是数字 newState = DfaState.IntLiteral; token.type = TokenType.IntLiteral; tokenText.append(ch); } else if (ch == ">") { // 第一个字符是 > newState = DfaState.GT; token.type = TokenType.GT; tokenText.append(ch); } } func readLeft(_ ch: Character) { var state = newState switch state { case .Initial: state = initToken(); // 重新确定后续状态 break; case .Id: if (isAlpha(ch) || isDigit(ch)) { tokenText.append(ch); // 保持标识符状态 } else { state = initToken(); // 退出标识符状态,并保存 Token } break; case .GT: if (ch == "=") { token.type = TokenType.GE; // 转换成 GE state = DfaState.GE; tokenText.append(ch); } else { state = initToken(); // 退出 GT 状态,并保存 Token } break; case .GE: state = initToken(); // 退出当前状态,并保存 Token break; case .IntLiteral: if (isDigit(ch)) { tokenText.append(ch); // 继续保持在数字字面量状态 } else { state = initToken(); // 退出当前状态,并保存 Token } break; } } read(str)
输出结果
token is: int type is: Identifier token is: age type is: Identifier token is: >= type is: GE token is: 45 type is: IntLiteral
END
来源:https://www.cnblogs.com/rockbean/p/12017036.html