去除html标签、css以及script

public static string ConvertHtml2String(string html)
        {
            html = html.Trim();
            html = Regex.Replace(html, "(\\<script(.+?)\\</script\\>)|(\\<style(.+?)\\</style\\>)", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);//去除css和script
            html = Regex.Replace(html, "<[^>]+>", "");//去除html标签
            html = Regex.Replace(html, "&[^;]+;", "");//去除html空格,类似&nbsp;,https://blog.csdn.net/weixin_42105932/article/details/80234320
            html = Regex.Replace(html, "\t", "");//去除换行、空格、制表符等
            html = Regex.Replace(html, "\r\n", "");
            html = Regex.Replace(html, "\r", "");
            html = Regex.Replace(html, "\n", "");
            html = Regex.Replace(html, " ", "");
            return html;
        }

 

posted @ 2020-08-17 14:09  六镇2012  阅读(209)  评论(0编辑  收藏  举报