C# 正则表达式过滤危险HTML

下面是两个过滤的方法 

/// <summary>
        /// 此处过滤危险HTML方法
        /// </summary>
        /// <param name="html">html</param>
        /// <returns></returns>
        private string FilterHTML(string html)
        {
            if (html == null)
                return "";

            //过滤 script
            Regex regex_script1 = new Regex("(<script[//s//S]*?///script//s*>)", RegexOptions.IgnoreCase);
            Regex regex_script2 = new Regex("(<(script[//s//S]*?)>)", RegexOptions.IgnoreCase);
            html = regex_script1.Replace(html, "");
            html = regex_script1.Replace(html, "");

            //过滤 <iframe> 标签
            Regex regex_iframe1 = new Regex("(<iframe [//s//S]+<iframe//s*>)", RegexOptions.IgnoreCase);
            Regex regex_iframe2 = new Regex("(<(iframe [//s//S]*?)>)", RegexOptions.IgnoreCase);
            html = regex_iframe1.Replace(html, "");
            html = regex_iframe2.Replace(html, "");

            //过滤 <frameset> 标签
            Regex regex_frameset1 = new Regex("(<frameset [//s//S]+<frameset //s*>)", RegexOptions.IgnoreCase);
            Regex regex_frameset2 = new Regex("(<(frameset [//s//S]*?)>)", RegexOptions.IgnoreCase);
            html = regex_frameset1.Replace(html, "");
            html = regex_frameset2.Replace(html, "");

            //过滤 <frame> 标签
            Regex regex_frame1 = new Regex("(<frame[//s//S]+<frame //s*>)", RegexOptions.IgnoreCase);
            Regex regex_frame2 = new Regex("(<(frame[//s//S]*?)>)", RegexOptions.IgnoreCase);
            html = regex_frame1.Replace(html, "");
            html = regex_frame2.Replace(html, "");

            //过滤 <form> 标签
            Regex regex_form1 = new Regex("(<(form [//s//S]*?)>)", RegexOptions.IgnoreCase);
            Regex regex_form2 = new Regex("(<(/form[//s//S]*?)>)", RegexOptions.IgnoreCase);
            html = regex_form1.Replace(html, "");
            html = regex_form2.Replace(html, "");

            //过滤 on: 的事件
            //过滤on 带单引号的 过滤on  带双引号的 过滤on 不带有引号的
            string regOn = @"<[//s//S]+ (on)[a-zA-Z]{4,20} *= *[//S ]{3,}>";
            string regOn2 = @"((on)[a-zA-Z]{4,20} *= *'[^']{3,}')|((on)[a-zA-Z]{4,20} *= */""[^/""]{3,}/"")|((on)[a-zA-Z]{4,20} *= *[^>/ ]{3,})";
            html = GetReplace(html, regOn, regOn2, "");


            //过滤 javascript: 的事件
            regOn = @"<[//s//S]+ (href|src|background|url|dynsrc|expression|codebase) *= *[ /""/']? *(javascript:)[//S]{1,}>";
            regOn2 = @"(' *(javascript|vbscript):([//S^'])*')|(/"" *(javascript|vbscript):[//S^/""]*/"")|([^=]*(javascript|vbscript):[^/> ]*)";
            html = GetReplace(html, regOn, regOn2, "");

            return html;
        }

        /// <summary>
        /// 正则双重过滤
        /// </summary>
        /// <param name="content"></param>
        /// <param name="splitKey1"></param>
        /// <param name="splitKey2"></param>
        /// <param name="newChars"></param>
        /// <returns></returns>
        private string GetReplace(string content, string splitKey1, string splitKey2, string newChars)
        {
            //splitKey1 第一个正则式匹配

            //splitKey2 匹配结果中再次匹配进行替换

            if (splitKey1 != null && splitKey1 != "" && splitKey2 != null && splitKey2 != "")
            {
                Regex rg = new Regex(splitKey1);
                System.Text.RegularExpressions.MatchCollection mc = rg.Matches(content);

                foreach (System.Text.RegularExpressions.Match mc1 in mc)
                {
                    string oldChar = mc1.ToString();
                    string newChar = new Regex(splitKey2, RegexOptions.IgnoreCase).Replace(oldChar, newChars);
                    content = content.Replace(oldChar, newChar);
                }
                return content;
            }
            else
            {
                if (splitKey2 != null && splitKey2 != "")
                {
                    Regex rg = new Regex(splitKey2, RegexOptions.IgnoreCase);
                    return rg.Replace(content, newChars);
                }
            }
            return content;
        }

使用的时候

 

this.content.InnerHtml = FilterHTML(studentQuestionInfo.Description);

 

posted @ 2013-10-30 11:44  麦田HH  阅读(3419)  评论(0编辑  收藏  举报