C#截取数据库中有HTML格式的字符串

 public static string SubStringTitle(string html, int num)
        {
            //Regex regex = new Regex(@"<(.|\n)+?>", RegexOptions.IgnoreCase);
            //Regex regex = new Regex(@"<(\w+)>", RegexOptions.IgnoreCase);
            //string temp = regex.Replace(title, "");
            //return temp.Length > num ? HttpUtility.HtmlEncode(temp.Substring(0, num)) + "..." : HttpUtility.HtmlEncode(temp);
            html = html.Replace("&ldquo;", "");
            html = html.Replace("&rdquo;", "");
            html = Regex.Replace(html, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            //删除HTML
            html = Regex.Replace(html, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"-->", "", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"<!--.*", "", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(nbsp|#160);", "", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
            html = Regex.Replace(html, @"&#(\d+);", "", RegexOptions.IgnoreCase);
            html = html.Replace("<", "");
            html = html.Replace(">", "");
            html = html.Replace("\r\n", "");
            return html.Length > num ? HttpUtility.HtmlEncode(html.Substring(0, num)) + "..." : HttpUtility.HtmlEncode(html);

        }

posted @ 2011-11-23 13:37 .NET技术阅读(313) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

.NET程序

学无止尽

C#截取数据库中有HTML格式的字符串