去除HTML

 //去除HTML标记  
    public string NoHTML(string Htmlstring) 
    {
        
//删除脚本  
        Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>""", RegexOptions.IgnoreCase);
        
//删除HTML  
        Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>""", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"([\r\n])[\s]+""", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"-->""", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"<!--.*""", RegexOptions.IgnoreCase);

        Htmlstring 
= Regex.Replace(Htmlstring, @"&(quot|#34);""\"", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(amp|#38);""&", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(lt|#60);""<", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(gt|#62);"">", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(nbsp|#160);"" ", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(iexcl|#161);""\xa1", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(cent|#162);""\xa2", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(pound|#163);""\xa3", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&(copy|#169);""\xa9", RegexOptions.IgnoreCase);
        Htmlstring 
= Regex.Replace(Htmlstring, @"&#(\d+);""", RegexOptions.IgnoreCase);

        Htmlstring.Replace(
"<""");
        Htmlstring.Replace(
">""");
        Htmlstring.Replace(
"\r\n""");
        Htmlstring 
= HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

        
return Htmlstring;
    }
posted on 2011-03-16 09:26  whh  阅读(266)  评论(0)    收藏  举报