问题
This is a follow-up question of my previous one: Better Class-Structure for logical expression parsing and evaluation
Brief introduction:
- rules as strings
- combinations of logical-and, logical-or, logical-negation and grouping by parenthesis of identifiers (ID's)
Example: "{100} AND (({101} OR {102}) OR ({103} AND {104})) AND NOT ({105} OR {106})"
This gets currently evaluated into a binary-tree of nodes, that looks like this:
Code taken from here: How to parse a boolean expression and load it into a class?
My implementation (Online Compiler):
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
namespace Rextester
{
public class Program
{
public static List<int> Rules = new List<int> { 103 , 106 };
public static void Main( string[] args )
{
var ruleStr = CleanupRuleString( "{100} AND (({101} OR {102}) OR ({103} AND {104})) AND NOT ({105} OR {106})" );
var inputRules = new List<int> { 103 , 106 };
var tree = GetTree( ruleStr );
var resTrue = Evaluate( tree , new List<int> { 100 , 101 } );
var resFalse = Evaluate( tree , new List<int> { 100 , 103 } );
Console.WriteLine( "resTrue: {0}" , resTrue );
Console.WriteLine( "resFalse: {0}" , resFalse );
}
public class Expression
{
public TokenTypes TokenType = TokenTypes.None;
public List<Expression> SubExpressions = new List<Expression>();
public string Literal = null;
}
public static Expression GetTree( string ruleStr )
{
var tokens = new List<Token>();
var reader = new StringReader( ruleStr );
for( var token = new Token( reader ) ; token.TokenType != TokenTypes.None ; token = new Token( reader ) )
{
tokens.Add( token );
}
tokens = TransformToPolishNotation( tokens );
var enumerator = tokens.GetEnumerator();
enumerator.MoveNext();
return CreateExpressionTree( ref enumerator );
}
public static string CleanupRuleString( string ruleStr )
{
foreach( var translation in TranslationMap )
{
var query = SyntaxMap.Where( x => x.Key == translation.Value ).Select( x => x.Key );
if( query.Any() )
ruleStr = ruleStr.Replace( translation.Key , query.Single().ToString() );
}
return new string( ruleStr.ToCharArray().Where( c => !char.IsWhiteSpace( c ) && c != '{' && c != '}' ).ToArray() );
}
public static bool Evaluate( Expression expr , List<int> rules )
{
if( expr.TokenType == TokenTypes.None )
{
return rules.Contains( Convert.ToInt32( expr.Literal ) );
}
else if( expr.TokenType == TokenTypes.Not )
{
return !Evaluate( expr.SubExpressions.Single() , rules );
}
else // binary op
{
if( expr.TokenType == TokenTypes.Or )
return Evaluate( expr.SubExpressions[ 0 ] , rules ) || Evaluate( expr.SubExpressions[ 1 ] , rules );
else if( expr.TokenType == TokenTypes.And )
return Evaluate( expr.SubExpressions[ 0 ] , rules ) && Evaluate( expr.SubExpressions[ 1 ] , rules );
}
throw new ArgumentException();
}
public static List<Token> TransformToPolishNotation( List<Token> infixTokenList )
{
var outputQueue = new Queue<Token>();
var stack = new Stack<Token>();
foreach( var token in infixTokenList )
{
switch( token.TokenType )
{
case TokenTypes.Literal:
{
outputQueue.Enqueue( token );
}
break;
case TokenTypes.Not:
case TokenTypes.And:
case TokenTypes.Or:
case TokenTypes.OpenParen:
{
stack.Push( token );
}
break;
case TokenTypes.CloseParen:
{
while( stack.Peek().TokenType != TokenTypes.OpenParen )
{
outputQueue.Enqueue( stack.Pop() );
}
stack.Pop();
if( stack.Count > 0 && stack.Peek().TokenType == TokenTypes.Not )
outputQueue.Enqueue( stack.Pop() );
}
break;
default:
break;
}
}
while( stack.Count > 0 )
{
outputQueue.Enqueue( stack.Pop() );
}
return outputQueue.Reverse().ToList();
}
public static Expression CreateExpressionTree( ref List<Token>.Enumerator tokenEnumerator )
{
var expression = new Expression();
if( tokenEnumerator.Current.TokenType == TokenTypes.Literal )
{
expression.Literal = tokenEnumerator.Current.Value;
tokenEnumerator.MoveNext();
return expression;
}
else if( tokenEnumerator.Current.TokenType != TokenTypes.None )
{
expression.TokenType = tokenEnumerator.Current.TokenType;
tokenEnumerator.MoveNext();
if( expression.TokenType == TokenTypes.Not )
{
expression.SubExpressions.Add( CreateExpressionTree( ref tokenEnumerator ) );
}
else if( expression.TokenType == TokenTypes.And || expression.TokenType == TokenTypes.Or )
{
expression.SubExpressions.Add( CreateExpressionTree( ref tokenEnumerator ) );
expression.SubExpressions.Add( CreateExpressionTree( ref tokenEnumerator ) );
}
}
return expression;
}
public static Dictionary<string,char> TranslationMap = new Dictionary<string,char> {
{ "NOT" , '!' } ,
{ "AND" , '&' } ,
{ "OR" , '|' } ,
};
public static Dictionary<char,TokenTypes> SyntaxMap = new Dictionary<char,TokenTypes>() {
{ '(' , TokenTypes.OpenParen } ,
{ ')' , TokenTypes.CloseParen } ,
{ '!' , TokenTypes.Not } ,
{ '&' , TokenTypes.And } ,
{ '|' , TokenTypes.Or } ,
};
public enum TokenTypes
{
None = -1,
OpenParen,
CloseParen,
And,
Or,
Not,
Literal,
}
public class Token
{
public TokenTypes TokenType;
public string Value;
public Token( StringReader s )
{
var charValue = s.Read();
if( charValue == -1 )
{
this.TokenType = TokenTypes.None;
this.Value = string.Empty;
return;
}
var ch = (char)charValue;
if( SyntaxMap.ContainsKey( ch ) )
{
this.TokenType = SyntaxMap[ ch ];
this.Value = string.Empty;
}
else // read literal
{
var sb = new StringBuilder();
sb.Append( ch );
while( s.Peek() != -1 && !SyntaxMap.ContainsKey( (char)s.Peek() ) )
{
sb.Append( (char)s.Read() );
}
this.TokenType = TokenTypes.Literal;
this.Value = sb.ToString();
}
}
}
}
}
Now I need to check by a given input of ID's which of them need to be included and excluded so that the current codepath results in TRUE:
input:
[
103 , 106
]
output:
[
{
inclusions: [ 100 , 101 ] ,
exclusions: [ 106 ]
} ,
{
inclusions: [ 100 , 102 ] ,
exclusions: [ 106 ]
} ,
{
inclusions: [ 100 , 103 , 104 ] ,
exclusions: [ 106 ]
} ,
]
My questions would be:
1. How do I traverse the tree so that I get all possible code-paths?
2. How do I keep track which ID's need to be included/excluded?
I'm looking for possible algorithms or already written implementations, but I would prefer to write it rather myself than using any kind of expression library
Note: speed isn't a concern, It just needs to work in and the code should be reasonable and easy understandable
回答1:
So you want to know which IDs you should add and/or take away from a given input so that an expression returns true for that input?
It's probably useful to look at this from a slightly different angle: what are the minimal inputs that can make this expression return true? Once that question is answered, your first input can be compared against these inputs and the differences are the answer to your first question.
A recursive approach would be sensible given the tree-like structure of your expressions. For each expression, getting the valid inputs for its child expressions (if it has any) should give you sufficient information to determine its own valid inputs:
ID expressions
ID expressions always have a single valid input: one that includes their ID.
1 --> {includes: [1]}
OR expressions
Every single valid input for a child expression of an OR expression is also a valid input for that OR expression itself. In other words, child inputs can be concatenated into a single list of valid inputs.
1 OR 2 OR 3:
{includes: [1]} OR {includes: [2]} OR {includes: [3]} --> {includes: [1]}
{includes: [2]}
{includes: [3]}
// Each child expression has a single valid input. Together, that's 3 valid inputs.
AND expressions
A valid input for an AND expression must satisfy at least one valid input of each child expressions, so the results are a combination of the valid input of all child expressions.
1 AND 2:
{includes: [1]} AND {includes: [2]} --> {includes: [1, 2]}
(1 OR 2) AND (3 OR 4):
{includes: [1]} AND {includes: [3]} --> {includes: [1, 3]}
{includes: [2]} {includes: [4]} {includes: [1, 4]}
{includes: [2, 3]}
{includes: [2, 4]}
// Each child expression has two valid inputs,
// which can be combined in 4 different ways.
NOT expressions
A valid input for a NOT expression must violate each valid input for the child expression. Leaving out just a single required ID, or including a single unwanted ID, is sufficient to violate an input, so there are many possible combinations.
NOT 1:
NOT {includes: [1]} --> {excludes: [1]}
NOT (1 AND 2):
NOT {includes: [1, 2]} --> {excludes: [1]}
{excludes: [2]}
// There are two minimal inputs that violate the single valid AND input.
NOT (1 OR 2):
NOT {includes: [1]} --> {excludes: [1, 2]}
{includes: [2]}
// There are two inputs, but only one way to violate each,
// so there's only one possible combination.
NOT ((1 OR 2) AND (3 OR 4)):
NOT {include: [1, 3]} --> {exclude: [1, 1, 2, 3]} --> {exclude: [1, 2, 3]}
{include: [1, 4]} {exclude: [1, 1, 2, 4]} {exclude: [1, 2, 4]}
{include: [2, 3]} {exclude: [1, 1, 3, 3]} {exclude: [1, 3]}
{include: [3, 4]} {exclude: [1, 1, 3, 4]} {exclude: [1, 3, 4]}
{exclude: [1, 4, 2, 3]} {exclude: [1, 2, 3, 4]}
{exclude: [1, 4, 2, 4]} {exclude: [2, 3, 4]}
{exclude: [1, 4, 3, 3]} {exclude: [3, 4]}
{exclude: [1, 4, 3, 4]}
{exclude: [3, 1, 2, 3]} |
{exclude: [3, 1, 2, 4]} v
{exclude: [3, 1, 3, 3]}
{exclude: [3, 1, 3, 4]} {exclude: [1, 2, 4]}
{exclude: [3, 4, 2, 3]} {exclude: [1, 3]}
{exclude: [3, 4, 2, 4]} {exclude: [3, 4]}
{exclude: [3, 4, 3, 3]}
{exclude: [3, 4, 3, 4]}
// Each of the 4 AND inputs can be violated in 2 ways, resulting in 2^4 = 16 combinations.
// Removing duplicate IDs and duplicate inputs leaves only 7.
// Furthermore, removing supersets leaves only 3 minimal inputs.
Other notes
As shown above, you'll want to eliminate duplicate inputs and remove inputs that are supersets of others (for example, {include: [1, 2]}
already covers {include: [1, 2, 3]}
), so that each method only returns the minimum set of valid inputs.
Contradictory inputs should also be removed:
1 AND (NOT 1):
{include: [1], exclude: [1]} --> (nothing)
// This expression has no valid inputs.
If the results for an expression contain two opposite inputs (one includes certain IDs, the other excludes the same IDs) then that expression is always valid. This could be represented by a single input object that specifies no include/exclude IDs.
Always-valid (and never-valid) expressions need to be taken into account by parent expressions. AND, OR and NOT each need to handle that edge-case in a different way:
- AND is always invalid if one of its children is invalid, but it can ignore an always-valid child.
- OR is always valid if one of its children is always valid, but it can ignore never-valid children.
- NOT simply changes always-valid to never-valid and vice versa.
Summary
- Use a recursive approach, with different logic for different expression types.
- The valid inputs for an expression's child expression(s) should be enough to determine the valid inputs for an expression itself. See 1.
来源:https://stackoverflow.com/questions/38426175/extract-all-possible-paths-from-expression-tree-and-evaluate-them-to-hold-true