C#正则Groups高级使用方法
正则表达式号称开发者得瑞士军刀,使用好正则表达式尤其重要。
拆分多个正则:
public static string[] SplitByManyRegex(string text, string[] subRegexStrings) { string allRegexString = "^(?<mySubGroup0>.*?)"; for (int i = 0; i < subRegexStrings.Length; i++) { allRegexString += "(?<mySubGroup" + (i + 1) + ">" + subRegexStrings[i] + ".*?)"; } allRegexString += "$"; Regex subRegex = new Regex(allRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase); MatchCollection mc = subRegex.Matches(text); if (mc.Count <= 0) { return new string[] { text }; } List<int> positions = new List<int>(); for (int m = 0; m < subRegexStrings.Length + 1; m++) { positions.Add(mc[0].Groups["mySubGroup" + m].Index); } List<string> result = new List<string>(); for (int i = 0; i < positions.Count; i++) { int nextPos = 0; if (i < positions.Count - 1) nextPos = positions[i + 1]; else nextPos = text.Length; result.Add(text.Substring(positions[i], nextPos - positions[i])); } return result.ToArray(); }
调用:
string[] tags = { "【答案】", "【解析】" };
拆分单个正则:
public static string[] SplitByRegex(string text, string subRegexString) { Regex subRegex = new Regex(subRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase); MatchCollection mc = subRegex.Matches(text); if (mc.Count <= 0) { return new string[] { text }; } List<int> positions = new List<int>(); for (int m = 0; m < mc.Count; m++) { positions.Add(mc[m].Index); } List<string> result = new List<string>(); result.Add(text.Substring(0, positions[0])); for (int i = 0; i < positions.Count; i++) { int nextPos = 0; if (i < mc.Count - 1) nextPos = positions[i + 1]; else nextPos = text.Length; result.Add(text.Substring(positions[i], nextPos - positions[i])); } return result.ToArray(); }
不反回第一条:
public static string[] SplitByRegexNoFirtPart(string text, string subRegexString) { string[] ary = SplitByRegex(text, subRegexString); return TrimFirstElementOfArray(ary); } private static string[] TrimFirstElementOfArray(string[] ary) { if (ary == null || ary.Length == 0) return new string[0]; string[] result = new string[ary.Length - 1]; for (int i = 1; i < ary.Length; i++) result[i - 1] = ary[i]; return result; }
拆分如:(A(B(C?)?)?)
public static string[] SplitByManyRegex_MayLess(string text, string[] subRegexStrings) { string allRegexString = "^(?<mySubGroup0>.*?)"; for (int i = 0; i < subRegexStrings.Length; i++) { allRegexString += "((?<mySubGroup" + (i + 1) + ">" + subRegexStrings[i] + ".*?)"; } for (int i = subRegexStrings.Length-1; i >=0 ; i--) { allRegexString += "?)"; } allRegexString += "$"; Regex subRegex = new Regex(allRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase); MatchCollection mc = subRegex.Matches(text); if (mc.Count <= 0) { return new string[] { text }; } List<int> positions = new List<int>(); for (int m = 0; m < subRegexStrings.Length + 1; m++) { if (mc[0].Groups["mySubGroup" + m].Success) { positions.Add(mc[0].Groups["mySubGroup" + m].Index); } } List<string> result = new List<string>(); for (int i = 0; i < positions.Count; i++) { int nextPos = 0; if (i < positions.Count - 1) nextPos = positions[i + 1]; else nextPos = text.Length; result.Add(text.Substring(positions[i], nextPos - positions[i])); } return result.ToArray(); }
可以任意顺序,任意个数:
public static string[] SplitByManyRegex_AnyOrder(string text, string[] subRegexStrings, bool resultChangeOrder = true ) { if(string.IsNullOrEmpty(text) || subRegexStrings==null || subRegexStrings.Length == 0) { return new string[] { text }; } string allReg = "(" + string.Join("|", subRegexStrings) + ")"; string[] result = SplitByRegex(text, allReg); if (!resultChangeOrder) return result; string[] ordered = new string[subRegexStrings.Length+1]; ordered[0] = result[0]; for(int i=1; i<result.Length; i++) { //将某部分放到对应的正则顺序 for(int k=0; k< subRegexStrings.Length; k++) { if(Regex.Match( result[i], subRegexStrings[k]).Success) { ordered[k+1] = result[i]; } } //如果某个没有找到则保持为null } return ordered; }
用正则表达式替换文本中的内容:
public static string TranformHandAnswer(string html) { string strReg = "(?<hand>(<handanswer>(.*?)</handanswer>))"; //正则表达式 Regex regex = new Regex(strReg, RegexOptions.Singleline | RegexOptions.IgnoreCase); int _subjectOrderNum = subjectOrderNum; //TODO: Lambda不允许ref变量,这里临时这样用 html = regex.Replace(html, (Match match) => { string handContent = match.Groups["hand"].Value; string result = “替换得文本” return result; }); return html; }
有以上几个辅助类,在难得正则拆分都能搞定。