C#去除指定字符串中的HTML标签相关代码函数

//去除指定字符串中的HTML标签相关代码函数
       private static string RemoveHtml(string strContent, string strTagName, int strType)
       {
           string pattern = "";
           string strResult = "";
           Regex exp;
           MatchCollection matchList;
           switch (strType)
           {
               case 1://去掉<a></a>中<a>标记的内容,保留<a>后面的所有代码

                   pattern = @"<" + strTagName + "([^>])*>";
                   exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
                   matchList = exp.Matches(strContent);
                   foreach (Match match in matchList)
                   {
                       if (match.Value.Length > 0)
                           strResult = match.Value;
                       strContent = strContent.Replace(strResult, "");
                       break;
                   }
                   break;

               case 2://去掉所有<a></a>两个标记的内容,保留<a>和</a>代码中间的代码
                   pattern = "<" + strTagName + "([^>])*>";
                   exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
                   matchList = exp.Matches(strContent);
                   foreach (Match match in matchList)
                   {
                       if (match.Value.Length > 0)
                           strResult = match.Value;
                       strContent = strContent.Replace(strResult, "");
                       break;
                   }
                   pattern = "</" + strTagName + "([^>])*>";
                   exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
                   matchList = exp.Matches(strContent);
                   foreach (Match match in matchList)
                   {
                       if (match.Value.Length > 0)
                           strResult = match.Value;
                       strContent = strContent.Replace(strResult, "");
                       break;
                   }
                   break;

               case 3://去掉所有<a></a>和两个标记之间的全部内容
                   pattern = "<" + strTagName + "([^>])*>.*?</" + strTagName + "([^>])*>";
                   exp = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
                   matchList = exp.Matches(strContent);
                   foreach (Match match in matchList)
                   {
                       if (match.Value.Length > 0)
                           strResult = match.Value;
                       strContent = strContent.Replace(strResult, "");
                       break;
                   }
                   break;
           }
           return strContent;
       }

posted on 2008-05-21 11:50  风灵溪清  阅读(387)  评论(0编辑  收藏  举报

导航