笔记 - C#从头开始构建编译器 - 3
程序员文章站
2022-03-20 12:42:10
视频与PR:https://github.com/terrajobst/minsk/blob/master/docs/episode-03.md 作者是 Immo Landwerth(https://twitter.com/terrajobst),微软 .NET 团队的项目经理。 这一集前半段主要是 ......
视频与pr:https://github.com/terrajobst/minsk/blob/master/docs/episode-03.md
作者是 immo landwerth(https://twitter.com/terrajobst),微软 .net 团队的项目经理。
这一集前半段主要是重构代码,后半段的主要内容:
1. 变量与赋值表达式
2. 加强诊断信息
parser 非常清晰
using system.collections.generic;
namespace minsk.codeanalysis.syntax
{
internal sealed class parser
{
private readonly syntaxtoken[] _tokens;
private int _position;
private diagnosticbag _diagnostics = new diagnosticbag();
public parser(string text)
{
var tokens = new list<syntaxtoken>();
var lexer = new lexer(text);
syntaxtoken token;
do
{
token = lexer.lex();
if (token.kind != syntaxkind.whitespacetoken && token.kind != syntaxkind.badtoken)
tokens.add(token);
} while (token.kind != syntaxkind.endoffiletoken);
_tokens = tokens.toarray();
_diagnostics.addrange(lexer.diagnostics);
}
public diagnosticbag diagnostics => _diagnostics;
private syntaxtoken peek(int offset)
{
var index = _position + offset;
if (index >= _tokens.length)
return _tokens[_tokens.length - 1];
return _tokens[index];
}
private syntaxtoken current => peek(0);
private syntaxtoken nexttoken()
{
var token = current;
_position++;
return token;
}
private syntaxtoken matchtoken(syntaxkind kind)
{
if (current.kind == kind)
return nexttoken();
_diagnostics.reportunexpectedtoken(current.span, current.kind, kind);
return new syntaxtoken(kind, current.position, null, null);
}
public syntaxtree parse()
{
var expression = parseexpression();
var endoffiletoken = matchtoken(syntaxkind.endoffiletoken);
return new syntaxtree(_diagnostics, expression, endoffiletoken);
}
private expressionsyntax parseexpression()
{
return parseassignmentexpression();
}
private expressionsyntax parseassignmentexpression()
{
if (peek(0).kind == syntaxkind.identifiertoken && peek(1).kind == syntaxkind.equalstoken)
{
var identifiertoken = nexttoken();
var equalstoken = nexttoken();
var right = parseassignmentexpression();
return new assignmentexpressionsyntax(identifiertoken, equalstoken, right);
}
return parsebinaryexpression();
}
private expressionsyntax parsebinaryexpression(int parentprecedence = 0)
{
expressionsyntax left;
var unaryoperatorprecedence = current.kind.getunaryoperatorprecedence();
if (unaryoperatorprecedence != 0 && unaryoperatorprecedence >= parentprecedence)
{
var operatortoken = nexttoken();
var operand = parsebinaryexpression(unaryoperatorprecedence);
left = new unaryexpressionsyntax(operatortoken, operand);
}
else
left = parseprimaryexpression();
while (true)
{
var precedence = current.kind.getbinaryoperatorprecedence();
if (precedence == 0 || precedence <= parentprecedence)
break;
var operatortoken = nexttoken();
var right = parsebinaryexpression(precedence);
left = new binaryexpressionsyntax(left, operatortoken, right);
}
return left;
}
private expressionsyntax parseprimaryexpression()
{
switch (current.kind)
{
case syntaxkind.openparenthesistoken:
{
var left = nexttoken();
var expression = parseexpression();
var right = matchtoken(syntaxkind.closeparenthesistoken);
return new parenthesizedexpressionsyntax(left, expression, right);
}
case syntaxkind.truekeyword:
case syntaxkind.falsekeyword:
{
var keywordtoken = nexttoken();
var value = keywordtoken.kind == syntaxkind.truekeyword;
return new literalexpressionsyntax(keywordtoken, value);
}
case syntaxkind.identifiertoken:
{
var identifiertoken = nexttoken();
return new nameexpressionsyntax(identifiertoken);
}
default:
{
var numbertoken = matchtoken(syntaxkind.numbertoken);
return new literalexpressionsyntax(numbertoken);
}
}
}
}
}
作为语义分析的 binder 也非常清晰
using system;
using system.collections.generic;
using system.linq;
using minsk.codeanalysis.syntax;
namespace minsk.codeanalysis.binding
{
internal sealed class binder
{
private readonly diagnosticbag _diagnostics = new diagnosticbag ();
private readonly dictionary<variablesymbol, object> _variables;
public binder(dictionary<variablesymbol, object> variables)
{
_variables = variables;
}
public diagnosticbag diagnostics => _diagnostics;
public boundexpression bindexpression(expressionsyntax syntax)
{
switch (syntax.kind)
{
case syntaxkind.parenthesizedexpression:
return bindparenthesizedexpression((parenthesizedexpressionsyntax)syntax);
case syntaxkind.literalexpression:
return bindliteralexpression((literalexpressionsyntax)syntax);
case syntaxkind.nameexpression:
return bindnameexpression((nameexpressionsyntax)syntax);
case syntaxkind.assignmentexpression:
return bindassignmentexpression((assignmentexpressionsyntax)syntax);
case syntaxkind.unaryexpression:
return bindunaryexpression((unaryexpressionsyntax)syntax);
case syntaxkind.binaryexpression:
return bindbinaryexpression((binaryexpressionsyntax)syntax);
default:
throw new exception($"unexpected syntax {syntax.kind}");
}
}
private boundexpression bindparenthesizedexpression(parenthesizedexpressionsyntax syntax)
{
return bindexpression(syntax.expression);
}
private boundexpression bindliteralexpression(literalexpressionsyntax syntax)
{
var value = syntax.value ?? 0;
return new boundliteralexpression(value);
}
private boundexpression bindnameexpression(nameexpressionsyntax syntax)
{
var name = syntax.identifiertoken.text;
var variable = _variables.keys.firstordefault(v => v.name == name);
if (variable == null)
{
_diagnostics.reportundefinedname(syntax.identifiertoken.span, name);
return new boundliteralexpression(0);
}
return new boundvariableexpression(variable);
}
private boundexpression bindassignmentexpression(assignmentexpressionsyntax syntax)
{
var name = syntax.identifiertoken.text;
var boundexpression = bindexpression(syntax.expression);
var existingvariable = _variables.keys.firstordefault(v => v.name == name);
if (existingvariable != null)
_variables.remove(existingvariable);
var variable = new variablesymbol(name, boundexpression.type);
_variables[variable] = null;
return new boundassignmentexpression(variable, boundexpression);
}
private boundexpression bindunaryexpression(unaryexpressionsyntax syntax)
{
var boundoperand = bindexpression(syntax.operand);
var boundoperator = boundunaryoperator.bind(syntax.operatortoken.kind, boundoperand.type);
if (boundoperator == null)
{
_diagnostics.reportundefinedunaryoperator(syntax.operatortoken.span, syntax.operatortoken.text, boundoperand.type);
return boundoperand;
}
return new boundunaryexpression(boundoperator, boundoperand);
}
private boundexpression bindbinaryexpression(binaryexpressionsyntax syntax)
{
var boundleft = bindexpression(syntax.left);
var boundright = bindexpression(syntax.right);
var boundoperator = boundbinaryoperator.bind(syntax.operatortoken.kind, boundleft.type, boundright.type);
if (boundoperator == null)
{
_diagnostics.reportundefinedbinaryoperator(syntax.operatortoken.span, syntax.operatortoken.text, boundleft.type, boundright.type);
return boundleft;
}
return new boundbinaryexpression(boundleft, boundoperator, boundright);
}
}
}
c#语言点:
public static class enumerable
{
public static tsource firstordefault<tsource>(this ienumerable<tsource> source, func<tsource, bool> predicate);
}
firstordefault 可以使用谓词作为判断条件,binder 的 55 行使用了 lambda 表达式。