欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

笔记 - C#从头开始构建编译器 - 3

程序员文章站 2022-03-20 12:42:10
视频与PR:https://github.com/terrajobst/minsk/blob/master/docs/episode-03.md 作者是 Immo Landwerth(https://twitter.com/terrajobst),微软 .NET 团队的项目经理。 这一集前半段主要是 ......

视频与pr:https://github.com/terrajobst/minsk/blob/master/docs/episode-03.md

作者是 immo landwerth(https://twitter.com/terrajobst),微软 .net 团队的项目经理。

 

这一集前半段主要是重构代码,后半段的主要内容:

1. 变量与赋值表达式

2. 加强诊断信息

 

parser 非常清晰

using system.collections.generic;

namespace minsk.codeanalysis.syntax
{
    internal sealed class parser
    {
        private readonly syntaxtoken[] _tokens;
        private int _position;
        private diagnosticbag _diagnostics = new diagnosticbag();

        public parser(string text)
        {
            var tokens = new list<syntaxtoken>();

            var lexer = new lexer(text);
            syntaxtoken token;
            do
            {
                token = lexer.lex();
                if (token.kind != syntaxkind.whitespacetoken && token.kind != syntaxkind.badtoken)
                    tokens.add(token);
            } while (token.kind != syntaxkind.endoffiletoken);

            _tokens = tokens.toarray();
            _diagnostics.addrange(lexer.diagnostics);
        }

        public diagnosticbag diagnostics => _diagnostics;

        private syntaxtoken peek(int offset)
        {
            var index = _position + offset;
            if (index >= _tokens.length)
                return _tokens[_tokens.length - 1];
            return _tokens[index];
        }

        private syntaxtoken current => peek(0);

        private syntaxtoken nexttoken()
        {
            var token = current;
            _position++;
            return token;
        }

        private syntaxtoken matchtoken(syntaxkind kind)
        {
            if (current.kind == kind)
                return nexttoken();

            _diagnostics.reportunexpectedtoken(current.span, current.kind, kind);
            return new syntaxtoken(kind, current.position, null, null);
        }

        public syntaxtree parse()
        {
            var expression = parseexpression();
            var endoffiletoken = matchtoken(syntaxkind.endoffiletoken);
            return new syntaxtree(_diagnostics, expression, endoffiletoken);
        }

        private expressionsyntax parseexpression()
        {
            return parseassignmentexpression();
        }

        private expressionsyntax parseassignmentexpression()
        {
            if (peek(0).kind == syntaxkind.identifiertoken && peek(1).kind == syntaxkind.equalstoken)
            {
                var identifiertoken = nexttoken();
                var equalstoken = nexttoken();
                var right = parseassignmentexpression();
                return new assignmentexpressionsyntax(identifiertoken, equalstoken, right);
            }

            return parsebinaryexpression();
        }

        private expressionsyntax parsebinaryexpression(int parentprecedence = 0)
        {
            expressionsyntax left;
            var unaryoperatorprecedence = current.kind.getunaryoperatorprecedence();
            if (unaryoperatorprecedence != 0 && unaryoperatorprecedence >= parentprecedence)
            {
                var operatortoken = nexttoken();
                var operand = parsebinaryexpression(unaryoperatorprecedence);
                left = new unaryexpressionsyntax(operatortoken, operand);
            }
            else
                left = parseprimaryexpression();

            while (true)
            {
                var precedence = current.kind.getbinaryoperatorprecedence();
                if (precedence == 0 || precedence <= parentprecedence)
                    break;

                var operatortoken = nexttoken();
                var right = parsebinaryexpression(precedence);
                left = new binaryexpressionsyntax(left, operatortoken, right);
            }

            return left;
        }

        private expressionsyntax parseprimaryexpression()
        {
            switch (current.kind)
            {
                case syntaxkind.openparenthesistoken:
                {
                    var left = nexttoken();
                    var expression = parseexpression();
                    var right = matchtoken(syntaxkind.closeparenthesistoken);
                    return new parenthesizedexpressionsyntax(left, expression, right);
                }

                case syntaxkind.truekeyword:
                case syntaxkind.falsekeyword:
                {
                    var keywordtoken = nexttoken();
                    var value = keywordtoken.kind == syntaxkind.truekeyword;
                    return new literalexpressionsyntax(keywordtoken, value);
                }

                case syntaxkind.identifiertoken:
                {
                    var identifiertoken = nexttoken();
                    return new nameexpressionsyntax(identifiertoken);
                }

                default:
                {
                    var numbertoken = matchtoken(syntaxkind.numbertoken);
                    return new literalexpressionsyntax(numbertoken);
                }
            }

        }
    }
}

作为语义分析的 binder 也非常清晰

using system;
using system.collections.generic;
using system.linq;
using minsk.codeanalysis.syntax;

namespace minsk.codeanalysis.binding
{
    internal sealed class binder
    {
        private readonly diagnosticbag _diagnostics = new diagnosticbag ();
        private readonly dictionary<variablesymbol, object> _variables;

        public binder(dictionary<variablesymbol, object> variables)
        {
            _variables = variables;
        }

        public diagnosticbag diagnostics => _diagnostics;

        public boundexpression bindexpression(expressionsyntax syntax)
        {
            switch (syntax.kind)
            {
                case syntaxkind.parenthesizedexpression:
                    return bindparenthesizedexpression((parenthesizedexpressionsyntax)syntax);
                case syntaxkind.literalexpression:
                    return bindliteralexpression((literalexpressionsyntax)syntax);
                case syntaxkind.nameexpression:
                    return bindnameexpression((nameexpressionsyntax)syntax);
                case syntaxkind.assignmentexpression:
                    return bindassignmentexpression((assignmentexpressionsyntax)syntax);
                case syntaxkind.unaryexpression:
                    return bindunaryexpression((unaryexpressionsyntax)syntax);
                case syntaxkind.binaryexpression:
                    return bindbinaryexpression((binaryexpressionsyntax)syntax);
                default:
                    throw new exception($"unexpected syntax {syntax.kind}");
            }
        }

        private boundexpression bindparenthesizedexpression(parenthesizedexpressionsyntax syntax)
        {
            return bindexpression(syntax.expression);
        }

        private boundexpression bindliteralexpression(literalexpressionsyntax syntax)
        {
            var value = syntax.value ?? 0;
            return new boundliteralexpression(value);
        }

        private boundexpression bindnameexpression(nameexpressionsyntax syntax)
        {
            var name = syntax.identifiertoken.text;
            var variable = _variables.keys.firstordefault(v => v.name == name);
            if (variable == null)
            {
                _diagnostics.reportundefinedname(syntax.identifiertoken.span, name);
                return new boundliteralexpression(0);
            }

            return new boundvariableexpression(variable);
        }

        private boundexpression bindassignmentexpression(assignmentexpressionsyntax syntax)
        {
            var name = syntax.identifiertoken.text;
            var boundexpression = bindexpression(syntax.expression);

            var existingvariable = _variables.keys.firstordefault(v => v.name == name);
            if (existingvariable != null)
                _variables.remove(existingvariable);

            var variable = new variablesymbol(name, boundexpression.type);
            _variables[variable] = null;

            return new boundassignmentexpression(variable, boundexpression);
        }

        private boundexpression bindunaryexpression(unaryexpressionsyntax syntax)
        {
            var boundoperand = bindexpression(syntax.operand);
            var boundoperator = boundunaryoperator.bind(syntax.operatortoken.kind, boundoperand.type);
            if (boundoperator == null)
            {
                _diagnostics.reportundefinedunaryoperator(syntax.operatortoken.span, syntax.operatortoken.text, boundoperand.type);
                return boundoperand;
            }
            return new boundunaryexpression(boundoperator, boundoperand);
        }

        private boundexpression bindbinaryexpression(binaryexpressionsyntax syntax)
        {
            var boundleft = bindexpression(syntax.left);
            var boundright = bindexpression(syntax.right);
            var boundoperator = boundbinaryoperator.bind(syntax.operatortoken.kind, boundleft.type, boundright.type);
            if (boundoperator == null)
            {
                _diagnostics.reportundefinedbinaryoperator(syntax.operatortoken.span, syntax.operatortoken.text, boundleft.type, boundright.type);
                return boundleft;
            }
            return new boundbinaryexpression(boundleft, boundoperator, boundright);
        }
    }
}

 

c#语言点:

public static class enumerable
{
    public static tsource firstordefault<tsource>(this ienumerable<tsource> source, func<tsource, bool> predicate);
}

firstordefault 可以使用谓词作为判断条件,binder 的 55 行使用了 lambda 表达式。