public string FilterHtmlTag(string HtmlString)
{
System.IO.StreamReader sr = new StreamReader("d:\\bb.html", System.Text.Encoding.UTF8);
string HtmlString = sr.ReadToEnd();
string Temp = HtmlString.Substring(HtmlString.IndexOf("<body>")); //
string Stup1 = Regex.Replace(Temp, @"<script>[\s\S]*</script>", ""); //过滤Js代码;
string Stup2 = Regex.Replace(Stup1, @"<style[\s\S]*</style>", ""); //过滤Css样式
string Stup3 = Regex.Replace(Stup2, "<.+?>", ""); //过滤html标签
string Stup4 = Regex.Replace(Stup3, @"\s", ""); //过滤/n/r等特列字符
return Stup4;
}
{
System.IO.StreamReader sr = new StreamReader("d:\\bb.html", System.Text.Encoding.UTF8);
string HtmlString = sr.ReadToEnd();
string Temp = HtmlString.Substring(HtmlString.IndexOf("<body>")); //
string Stup1 = Regex.Replace(Temp, @"<script>[\s\S]*</script>", ""); //过滤Js代码;
string Stup2 = Regex.Replace(Stup1, @"<style[\s\S]*</style>", ""); //过滤Css样式
string Stup3 = Regex.Replace(Stup2, "<.+?>", ""); //过滤html标签
string Stup4 = Regex.Replace(Stup3, @"\s", ""); //过滤/n/r等特列字符
return Stup4;
}
以上代码能过滤结构比较规范的HTML页面,如页面结构比较复杂,请自己自行修改。修改后更完整的代码也希望能回传一份。
邮箱:pweixu@163.com 大家一起分享