Parsing if-else if statement algorithm

后端 未结 3 863
渐次进展
渐次进展 2021-02-09 08:56

I am trying to create a very simple parser for an if-else type structure that will build and execute a SQL statement.

Rather than testing conditions for executing statem

3条回答
  •  日久生厌
    2021-02-09 09:31

    I wrote a simple parser, which I tested against the example you provided. If you want to know more about parsing I suggest you to read Compiler Construction from Niklaus Wirth.

    The first step is always to write down the syntax of your language in an appropriate way. I have chosen EBNF, which is very simple to understand.

    | separates alternatives.

    [ and ] enclose options.

    { and } denote repetitions (zero, one or more).

    ( and ) group expressions (not used here).

    This description is not complete but the link I provided describes it in more detail.

    The EBNF syntax

    LineSequence = { TextLine | IfStatement }.
    TextLine     = .
    IfStatement  = IfLine LineSequence { ElseIfLine LineSequence } [ ElseLine LineSequence ] EndLine.
    IfLine       = "#if" "(" Condition ")".
    ElseLine     = "#else".
    ElseIfLine   = "#else" "if" "(" Condition ")".
    EndLine      = "#end".
    Condition    = Identifier "=" Identifier.
    Identifier   =  {  |  }.
    

    The parser follows closely the syntax, i.e. a repetition is translated into a loop, an alternative into an if-else statement, and so on.

    using System;
    using System.Collections.Generic;
    using System.Text.RegularExpressions;
    using System.Windows.Forms;
    
    namespace Example.SqlPreprocessor
    {
        class Parser
        {
            enum Symbol
            {
                None,
                LPar,
                RPar,
                Equals,
                Text,
                NumberIf,
                If,
                NumberElse,
                NumberEnd,
                Identifier
            }
    
            List _input; // Raw SQL with preprocessor directives.
            int _currentLineIndex = 0;
    
            // Simulates variables used in conditions
            Dictionary _variableValues = new Dictionary { 
                { "VariableA", "Case1" },
                { "VariableB", "CaseX" }
            };
    
            Symbol _sy; // Current symbol.
            string _string; // Identifier or text line;
            Queue _textQueue = new Queue(); // Buffered text parts of a single line.
            int _lineNo; // Current line number for error messages.
            string _line; // Current line for error messages.
    
            /// 
            /// Get the next line from the input.
            /// 
            /// Input line or null if no more lines are available.
            string GetLine()
            {
                if (_currentLineIndex >= _input.Count) {
                    return null;
                }
                _line = _input[_currentLineIndex++];
                _lineNo = _currentLineIndex;
                return _line;
            }
    
            /// 
            /// Get the next symbol from the input stream and stores it in _sy.
            /// 
            void GetSy()
            {
                string s;
                if (_textQueue.Count > 0) { // Buffered text parts available, use one from these.
                    s = _textQueue.Dequeue();
                    switch (s.ToLower()) {
                        case "(":
                            _sy = Symbol.LPar;
                            break;
                        case ")":
                            _sy = Symbol.RPar;
                            break;
                        case "=":
                            _sy = Symbol.Equals;
                            break;
                        case "if":
                            _sy = Symbol.If;
                            break;
                        default:
                            _sy = Symbol.Identifier;
                            _string = s;
                            break;
                    }
                    return;
                }
    
                // Get next line from input.
                s = GetLine();
                if (s == null) {
                    _sy = Symbol.None;
                    return;
                }
    
                s = s.Trim(' ', '\t');
                if (s[0] == '#') { // We have a preprocessor directive.
                    // Split the line in order to be able get its symbols.
                    string[] parts = Regex.Split(s, @"\b|[^#_a-zA-Z0-9()=]");
                    // parts[0] = #
                    // parts[1] = if, else, end
                    switch (parts[1].ToLower()) {
                        case "if":
                            _sy = Symbol.NumberIf;
                            break;
                        case "else":
                            _sy = Symbol.NumberElse;
                            break;
                        case "end":
                            _sy = Symbol.NumberEnd;
                            break;
                        default:
                            Error("Invalid symbol #{0}", parts[1]);
                            break;
                    }
    
                    // Store the remaining parts for later.
                    for (int i = 2; i < parts.Length; i++) {
                        string part = parts[i].Trim(' ', '\t');
                        if (part != "") {
                            _textQueue.Enqueue(part);
                        }
                    }
                } else { // We have an ordinary SQL text line.
                    _sy = Symbol.Text;
                    _string = s;
                }
            }
    
            void Error(string message, params  object[] args)
            {
                // Make sure parsing stops here
                _sy = Symbol.None;
                _textQueue.Clear();
                _input.Clear();
    
                message = String.Format(message, args) +
                          String.Format(" in line {0}\r\n\r\n{1}", _lineNo, _line);
                Output("------");
                Output(message);
                MessageBox.Show(message, "Error");
            }
    
            /// 
            /// Writes the processed line to a (simulated) output stream.
            /// 
            /// Line to be written to output
            void Output(string line)
            {
                Console.WriteLine(line);
            }
    
            /// 
            /// Starts the parsing process.
            /// 
            public void Parse()
            {
                // Simulate an input stream.
                _input = new List {
                    "select column1",
                    "from",
                    "#if(VariableA = Case1)",
                    "    #if(VariableB = Case3)",
                    "        table3",
                    "    #else",
                    "        table4",
                    "    #end",
                    "#else if(VariableA = Case2)",
                    "    table2",
                    "#else",
                    "    defaultTable",
                    "#end"
                };
    
                // Clear previous parsing
                _textQueue.Clear();
                _currentLineIndex = 0;
    
                // Get first symbol and start parsing
                GetSy();
                if (LineSequence(true)) { // Finished parsing successfully.
                    //TODO: Do something with the generated SQL
                } else { // Error encountered.
                    Output("*** ABORTED ***");
                }
            }
    
            // The following methods parse according the the EBNF syntax.
    
            bool LineSequence(bool writeOutput)
            {   
                // EBNF:  LineSequence = { TextLine | IfStatement }.
                while (_sy == Symbol.Text || _sy == Symbol.NumberIf) {
                    if (_sy == Symbol.Text) {
                        if (!TextLine(writeOutput)) {
                            return false;
                        }
                    } else { // _sy == Symbol.NumberIf
                        if (!IfStatement(writeOutput)) {
                            return false;
                        }
                    }
                }
                return true;
            }
    
            bool TextLine(bool writeOutput)
            {
                // EBNF:  TextLine = .
                if (writeOutput) {
                    Output(_string);
                }
                GetSy();
                return true;
            }
    
            bool IfStatement(bool writeOutput)
            {
                // EBNF:  IfStatement = IfLine LineSequence { ElseIfLine LineSequence } [ ElseLine LineSequence ] EndLine.
                bool result;
                if (IfLine(out result) && LineSequence(writeOutput && result)) {
                    writeOutput &= !result; // Only one section can produce an output.
                    while (_sy == Symbol.NumberElse) {
                        GetSy();
                        if (_sy == Symbol.If) { // We have an #else if
                            if (!ElseIfLine(out result)) {
                                return false;
                            }
                            if (!LineSequence(writeOutput && result)) {
                                return false;
                            }
                            writeOutput &= !result; // Only one section can produce an output.
                        } else { // We have a simple #else
                            if (!LineSequence(writeOutput)) {
                                return false;
                            }
                            break; // We can have only one #else statement.
                        }
                    }
                    if (_sy != Symbol.NumberEnd) {
                        Error("'#end' expected");
                        return false;
                    }
                    GetSy();
                    return true;
                }
                return false;
            }
    
            bool IfLine(out bool result)
            {
                // EBNF:  IfLine = "#if" "(" Condition ")".
                result = false;
                GetSy();
                if (_sy != Symbol.LPar) {
                    Error("'(' expected");
                    return false;
                }
                GetSy();
                if (!Condition(out result)) {
                    return false;
                }
                if (_sy != Symbol.RPar) {
                    Error("')' expected");
                    return false;
                }
                GetSy();
                return true;
            }
    
            private bool Condition(out bool result)
            {
                // EBNF:  Condition = Identifier "=" Identifier.
                string variable;
                string expectedValue;
                string variableValue;
    
                result = false;
                // Identifier "=" Identifier
                if (_sy != Symbol.Identifier) {
                    Error("Identifier expected");
                    return false;
                }
                variable = _string; // The first identifier is a variable.
                GetSy();
                if (_sy != Symbol.Equals) {
                    Error("'=' expected");
                    return false;
                }
                GetSy();
                if (_sy != Symbol.Identifier) {
                    Error("Value expected");
                    return false;
                }
                expectedValue = _string;  // The second identifier is a value.
    
                // Search the variable
                if (_variableValues.TryGetValue(variable, out variableValue)) {
                    result = variableValue == expectedValue; // Perform the comparison.
                } else {
                    Error("Variable '{0}' not found", variable);
                    return false;
                }
    
                GetSy();
                return true;
            }
    
            bool ElseIfLine(out bool result)
            {
                // EBNF:  ElseIfLine = "#else" "if" "(" Condition ")".
                result = false;
                GetSy(); // "#else" already processed here, we are only called if the symbol is "if"
                if (_sy != Symbol.LPar) {
                    Error("'(' expected");
                    return false;
                }
                GetSy();
                if (!Condition(out result)) {
                    return false;
                }
                if (_sy != Symbol.RPar) {
                    Error("')' expected");
                    return false;
                }
                GetSy();
                return true;
            }
        }
    }
    

    Note that the nested if-statements are processed automatically in a quite natural way. First, the grammar is expressed recursively. A LineSequence can contain IfStatments and IfStatments contain LineSequences. Second, this results in syntax processing methods that call each other in a recursive manner. The nesting of syntax elements is therefore translated into recursive method calls.

提交回复
热议问题