public static string FilterHtml(string string_include_html) { string[] HtmlRegexArr ={ #region Html 正则数组 @"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*\n" #endregion }; string[] HtmlReplaceArr = { #region 替换Html字符 "", "", "", "\"", "&", "<", ">", " ", "\xa1", "\xa2", "\xa3", "\xa9", "", "\r\n", "" #endregion }; string string_no_html = null; for (int i = 0; i < HtmlRegexArr.Length; i++) { System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(HtmlRegexArr[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase); string_no_html = regex.Replace(string_include_html, HtmlReplaceArr[i]); } string_no_html.Replace("<", ""); string_no_html.Replace(">", ""); string_no_html.Replace("\r\n", ""); return string_no_html; }
以上来至网络,但个人认为还是不行。故有以下自己写的:
/// <summary> /// 将Html标签转化为空 /// </summary> /// <param name="strHtml">待转化的字符串</param> /// <returns>经过转化的字符串</returns> public static string GetStringNoHtml(string string_include_html) { if (String.IsNullOrEmpty(string_include_html)) { return ""; } else { string_include_html = string_include_html.Replace("<BR>", "\r\n").Replace("<br>", "\r\n"); //第一种 string string_no_html = System.Text.RegularExpressions.Regex.Replace(string_include_html, @"(<script[^>]*?>.*?</script>)|(<(.[^>]*)>)", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); return string_no_html.Replace(" ", " "); //第二种 //return System.Text.RegularExpressions.Regex.Replace(string_include_html, @"(<script[^>]*?>.*?</script>)|(<(.[^>]*)>)|( )", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); } }
讓眾人的薪枝構起這團熱情的火焰