郁闷的大象

 

把文章里边的html标签去掉(去掉文字的样式,显示css设置的样式)

//删除脚本 
Htmlstring = Regex.Replace(Htmlstring, @" <script(\s[^>]*?)?>[\s\S]*? </script>", "", RegexOptions.IgnoreCase);
//删除样式
Htmlstring = Regex.Replace(Htmlstring, @" <style>[\s\S]*? </style>", "", RegexOptions.IgnoreCase);
//删除html标签
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)>", "", RegexOptions.IgnoreCase);

public string checkStr(string html)
      {
          System.Text.RegularExpressions.Regex regex1
= new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex2
= new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex3
= new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex4
= new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex5
= new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex6
= new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex8
= new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          System.Text.RegularExpressions.Regex regex9
= new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
          html
= regex1.Replace(html, "");
          html
= regex2.Replace(html, "");
          html
= regex3.Replace(html, " _disibledevent=");
          html
= regex4.Replace(html, "");
          html
= regex5.Replace(html, "");
          html
= regex6.Replace(html, "");
          html
= regex7.Replace(html, "");
          html
= regex8.Replace(html, "");
          html
= regex9.Replace(html, "");
          html
= html.Replace(" ", "");
          html
= html.Replace("</strong>", "");
          html
= html.Replace("<strong>", "");
         
return html;
}

posted on 2011-08-09 17:39  郁闷的大象  阅读(2308)  评论(0编辑  收藏  举报

导航