问题:实现一个解析器,可以将文本解析成表达式树;例如:"-3 + 4 * (2 / -log( 1 - 5 ))"
表达式数据结构:操作符 + – × / (),常数,等等,都应被定义为一种类。特别说明,将括号定义为一种表达式是很有必要的。对于负号的处理,可以在表达式类中加入一个成员变量做标记;
解析器:上面的树状结构是结果,获得这个树,就需要对字符进行解析,来创建树,基本逻辑如下。
1,从左到右读字符串,如果是空格,忽略;如果是常数,创建一个常数表达式;如果是括号,将括号里的字符串提取出来,递归,括号里的字符串会被解析为一个表达式;如果是函数,将函数括号里的字符串提取出来,递归,得到一个表达式作为函数表达式的操作数;如果是操作符,提取操作符,按字符串保存;最终结果,得到一个表达式对象和字符串的混合数组;
2,处理负号,将负号并入表达式里;
3,按操作符操作优先级建立操作符表达式;
代码:示例代码,不完整。
表达式类
using System; using System.Collections.Generic; using System.Linq; using System.Numerics; using System.Text; namespace Program { public enum EquResultDataType { rtConst, rtComplexArry } public class EquationException : Exception { public EquationException(string msg) : base(msg) { } public EquationException(string msg, Exception innerExp) : base(msg, innerExp) { } } public abstract class Equation { virtual public object Evaluate() { throw new NotImplementedException(); } public EquResultDataType ResultDataType { get; protected set; } // if the equation start with a ' - ' private double _sign = 1.0; protected double Sign { get { return _sign; } } virtual public void MultiplyMinusOne() { _sign *= -1.0; } virtual public string ToEquationText() { throw new NotImplementedException(); } } public class BinaryEquation : Equation { public Equation LeftOperand { get; set; } public Equation RightOperand { get; set; } } public class UnaryEquaton : Equation { public Equation Operand { get; set; } } public class EquAddition : BinaryEquation { public override object Evaluate() { object leftResult = LeftOperand.Evaluate(); object rightResult = RightOperand.Evaluate(); if (LeftOperand.ResultDataType == EquResultDataType.rtComplexArry && RightOperand.ResultDataType == EquResultDataType.rtComplexArry) { Complex[] leftData = leftResult as Complex[]; Complex[] rightData = rightResult as Complex[]; if (leftData.Length != rightData.Length) throw new EquationException(string.Format("{0}, {1}: data length are not equal.", LeftOperand.ToEquationText(), RightOperand.ToEquationText())); else { Complex[] result = new Complex[leftData.Length]; for (int i = 0; i < leftData.Length; ++i) { result[i] = leftData[i] + rightData[i]; } if (Sign < 0) { for (int i = 0; i < result.Length; ++i) result[i] *= -1.0; } return result; } } else if (LeftOperand.ResultDataType == EquResultDataType.rtComplexArry && RightOperand.ResultDataType == EquResultDataType.rtConst) { Complex[] leftData = leftResult as Complex[]; double rightData = (double)rightResult; Complex[] result = new Complex[leftData.Length]; for (int i = 0; i < leftData.Length; ++i) { result[i] = leftData[i] + rightData; } if (Sign < 0) { for (int i = 0; i < result.Length; ++i) result[i] *= -1.0; } return result; } else if (LeftOperand.ResultDataType == EquResultDataType.rtConst && RightOperand.ResultDataType == EquResultDataType.rtComplexArry) { double leftData = (double)leftResult; Complex[] rightData = rightResult as Complex[]; Complex[] result = new Complex[rightData.Length]; for (int i = 0; i < rightData.Length; ++i) { result[i] = leftData + rightData[i]; } if (Sign < 0) { for (int i = 0; i < result.Length; ++i) result[i] *= -1.0; } return result; } else if (LeftOperand.ResultDataType == EquResultDataType.rtConst && RightOperand.ResultDataType == EquResultDataType.rtConst) { double leftData = (double)leftResult; double rightData = (double)rightResult; double result = leftData + rightData; if (Sign < 0) result *= -1.0; return result; } else { throw new EquationException("Child equation evaluate error!"); } } public override string ToEquationText() { return LeftOperand.ToEquationText() + " + " + RightOperand.ToEquationText(); } } public class EquSubtraction : BinaryEquation { public override object Evaluate() { throw new NotImplementedException(); } public override string ToEquationText() { return LeftOperand.ToEquationText() + " - " + RightOperand.ToEquationText(); } } public class EquMultiplication : BinaryEquation { public override object Evaluate() { throw new NotImplementedException(); } public override string ToEquationText() { return LeftOperand.ToEquationText() + " * " + RightOperand.ToEquationText(); } } public class EquDivision : BinaryEquation { public override object Evaluate() { throw new NotImplementedException(); } public override string ToEquationText() { return LeftOperand.ToEquationText() + " / " + RightOperand.ToEquationText(); } } public class EquParenthesis : UnaryEquaton { public override object Evaluate() { if (Sign < 0) Operand.MultiplyMinusOne(); object result = Operand.Evaluate(); return result; } public override string ToEquationText() { string txt = "( " + Operand.ToEquationText() + " )"; if (Sign < 0) txt = "-" + txt; return txt; } } public class EquConstant : UnaryEquaton { double _const = 0.0; public EquConstant(double c) { _const = c; } public override void MultiplyMinusOne() { _const *= -1.0; } public override object Evaluate() { return _const; } public override string ToEquationText() { return _const.ToString(); } } public class EquFunction : UnaryEquaton { public override object Evaluate() { object val = Operand.Evaluate(); switch (Operand.ResultDataType) { case EquResultDataType.rtConst: { double d = (double)val; ResultDataType = EquResultDataType.rtConst; double result = Math.Log(d); if (Sign < 0) result *= -1.0; return result; } case EquResultDataType.rtComplexArry: { Complex[] arr = val as Complex[]; for (int i = 0; i < arr.Length; ++i) arr[i] = Complex.Log(arr[i]); ResultDataType = EquResultDataType.rtComplexArry; if (Sign < 0) { for (int i = 0; i < arr.Length; ++i) arr[i] *= -1.0; } return arr; } default: throw new EquationException("Child equation evaluate error!"); } } public override string ToEquationText() { string txt = "Log(" + Operand.ToEquationText() + ")"; if (Sign < 0) txt = "-" + txt; return txt; } } }
解析器
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Diagnostics; using System.Text.RegularExpressions; namespace Program { public class EquationBuilder { List<object> tokens = new List<object>(); protected string TokenizeParenthesis(string txt, ref int startpos) { string tok = txt.Substring(startpos, 1); while (tok == " ") // remove leading space { ++startpos; tok = txt.Substring(startpos, 1); } int pos0 = startpos; // position when start int cntLeftParenthesis = 0; if (tok == "(") { ++cntLeftParenthesis; ++startpos; } else { Debug.Assert(txt.StartsWith("(")); return string.Empty; } while (cntLeftParenthesis > 0) { if (startpos >= txt.Length) throw new EquationException("Parenthesis mismatch"); tok = txt.Substring(startpos, 1); ++startpos; if (tok == "(") { ++cntLeftParenthesis; } else if (tok == ")") { --cntLeftParenthesis; } } int pos1 = startpos; // position when finish string str = txt.Substring(pos0, pos1 - pos0); Debug.Assert(!string.IsNullOrWhiteSpace(str)); Debug.Assert(str.StartsWith("(") && str.EndsWith(")")); str = str.Substring(1, str.Length - 2); // remove '(', ')' return str; } protected object Tokenize(string txt, ref int startpos) { // () + - * / string tok = txt.Substring(startpos, 1); switch (tok) { case " ": { ++startpos; return null; } case "(": { string str = TokenizeParenthesis(txt, ref startpos); EquationBuilder bd = new EquationBuilder(); // recursive Equation operand = bd.Build(str); EquParenthesis equ = new EquParenthesis() { Operand = operand }; return equ; } case "+": case "-": case "*": case "/": { ++startpos; return tok; } } // functions if (startpos + 3 < txt.Length) { tok = txt.Substring(startpos, 3); if (tok.ToUpper() == "LOG") { startpos += 3; String str = TokenizeParenthesis(txt, ref startpos); if (string.IsNullOrWhiteSpace(str)) throw new EquationException(string.Format("Invalide argurment of function '{0}'", tok)); EquationBuilder bd = new EquationBuilder(); // recursive Equation operand = bd.Build(str); EquFunction equ = new EquFunction() { Operand = operand }; return equ; } } // dut data // ... // constants Regex reg = new Regex(@"^(\d*\.\d+|\d+|\d+\.\d*)"); tok = txt.Substring(startpos, txt.Length - startpos); Match mt = reg.Match(tok); if (mt.Success) { tok = mt.Value; startpos += tok.Length; EquConstant equ = new EquConstant(Convert.ToDouble(tok)); return equ; } string err = string.Format("Unrecognized string '{0}'", txt.Substring(startpos, txt.Length - startpos)); throw new EquationException(err); } static int OperatorPrecedence(string op1, string op2) { switch (op1) { case "+": case "-": { switch (op2) { case "+": case "-": return 0; case "*": case "/": return -1; default: throw new EquationException("Unrecognized operator " + op2); } } case "*": case "/": { switch (op2) { case "+": case "-": return 1; case "*": case "/": return 0; default: throw new EquationException("Unrecognized operator " + op2); } } default: throw new EquationException("Unrecognized operator " + op1); } } static Equation Build(Equation left, Equation right, string op) { switch (op) { case "+": return new EquAddition() { LeftOperand = left, RightOperand = right }; case "-": return new EquSubtraction() { LeftOperand = left, RightOperand = right }; case "*": return new EquMultiplication() { LeftOperand = left, RightOperand = right }; case "/": return new EquDivision() { LeftOperand = left, RightOperand = right }; default: throw new EquationException("Unrecognized operator " + op); } } public Equation Build(string txt) { // clear leading space txt = txt.Trim(); // if the text is "( ... )" if (txt.StartsWith("(") && txt.EndsWith(")")) { txt = txt.Substring(1, txt.Length - 2); Debug.Assert(false); } // example: txt = "-3 + 4 * (2 / -log( 1 - 5 ))" // tokenize it to top level segments int pos = 0; while (pos < txt.Length) { object obj = Tokenize(txt, ref pos); if (obj != null) tokens.Add(obj); } // example: tokens should be {-, 3, +, 4, equ} // bring '-' into equation if there is one for (int i = tokens.Count - 1; i >= 0; --i) { object o = tokens[i]; if (o is Equation) { if (i >= 2 && (tokens[i - 1] is string) && (tokens[i - 2] is string)) { string str2 = tokens[i - 2] as string; string str1 = tokens[i - 1] as string; Equation equ = o as Equation; if (str1 == "-") { equ.MultiplyMinusOne(); tokens.RemoveAt(i - 1); } else { string err = string.Format("Lack of operand between operators '{0}{1}'", str2, str1); throw new EquationException(err); } } else if (i == 1 && (tokens[i - 1] is string)) { string str1 = tokens[0] as string; Equation equ = o as Equation; if (str1 == "-") { equ.MultiplyMinusOne(); tokens.RemoveAt(i - 1); } else { string err = string.Format("Lack of operand before '{0}{1}'", str1, equ.ToEquationText()); throw new EquationException(err); } } } } // example: tokens now should be {-3, +, 4, equ(modified in inner recursive)} // check, now the tokens should be like {equ, operator, equ, operator, ..., operator, equ} bool beEqu = true; for (int i = 0; i < tokens.Count; ++i) { object o = tokens[i]; if (beEqu != (tokens[i] is Equation)) { string str1 = string.Empty; if (i - 1 >= 0) { if (tokens[i - 1] is Equation) str1 = (tokens[i - 1] as Equation).ToEquationText(); else str1 = tokens[i - 1] as string; } string str = string.Empty; if (tokens[i] is Equation) str = (tokens[i] as Equation).ToEquationText(); else str = tokens[i] as string; string str2 = string.Empty; if (i + 1 < tokens.Count) { if (tokens[i + 1] is Equation) str2 = (tokens[i + 1] as Equation).ToEquationText(); else str2 = tokens[i + 1] as string; } string err = string.Format("Lack of operands/operators '{0}{1}{2}'", str1, str, str2); throw new EquationException(err); } beEqu = !beEqu; } if (tokens[tokens.Count - 1] is string) { throw new EquationException("Lack of operand at the end"); } // build the tree according precedence Stack<string> opStack = new Stack<string>(); Stack<Equation> eqStack = new Stack<Equation>(); for (int i = 0; i < tokens.Count; ++i) { object o = tokens[i]; if (o is Equation) { eqStack.Push(o as Equation); } else if (o is string) { string op = o as string; if (opStack.Count == 0) { opStack.Push(op); } else { // if new add operator is less precedence, build previous first while (true) { string op0 = opStack.Peek(); if (OperatorPrecedence(op, op0) < 0) { Equation right = eqStack.Pop(); Equation left = eqStack.Pop(); Debug.Assert(left != null && right != null); op0 = opStack.Pop(); Equation equ = Build(left, right, op0); eqStack.Push(equ); if (opStack.Count == 0) break; } else { break; } } // then, still push the new operator in opStack.Push(op); } } } while (opStack.Count > 0) { Equation right = eqStack.Pop(); Equation left = eqStack.Pop(); string op = opStack.Pop(); Equation equ = Build(left, right, op); eqStack.Push(equ); } Debug.Assert(eqStack.Count == 1); Equation equResult = eqStack.Pop(); return equResult; } } }
例子:
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace Program { class Program { static void Main(string[] args) { string txt = "-3 + 4 * (2 / -log( 1 - 5 ))"; //string txt = "3 + 4"; EquationBuilder bd = new EquationBuilder(); Equation equ = bd.Build(txt); Console.WriteLine(equ.ToEquationText()); Console.Read(); } } }