去除html标记和替换script标记

   1:   /// <summary>
   2:          /// 去除HTML标记
   3:          /// </summary>
   4:          /// <param name="NoHTML">包括HTML的源码 </param>
   5:          /// <returns>已经去除后的文字</returns>
   6:          public static string RemoveHTML(string Htmlstring)
   7:          {
   8:              if (string.IsNullOrEmpty(Htmlstring))
   9:              {
  10:                  return string.Empty;
  11:              }
  12:              //删除脚本
  13:              Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
  14:   
  15:              //删除HTML
  16:              Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
  17:              Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
  18:              Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
  19:              Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
  20:              Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
  21:              Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
  22:              Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
  23:              Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
  24:              Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
  25:              Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
  26:              Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
  27:              Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
  28:              Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
  29:              Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
  30:              Htmlstring.Replace("<", "");
  31:              Htmlstring.Replace(">", "");
  32:              Htmlstring.Replace("\r\n", "");
  33:   
  34:              return Htmlstring;
  35:          }
  36:   
  37:   
  38:          #region 正则表达式替换包含script脚本攻击的script代码
  39:          /// <summary>
  40:          /// 正则表达式替换包含script脚本攻击的script代码
  41:          /// author:Andrew.He
  42:          /// </summary>
  43:          /// <param name="scriptString">包含脚本攻击的字符串</param>
  44:          /// <returns>替换脚本攻击的字符串</returns>
  45:          public static string RemoveScript(string scriptString)
  46:          {
  47:              if (string.IsNullOrEmpty(scriptString))
  48:              {
  49:                  return scriptString;
  50:              }
  51:   
  52:              //执行替换操作
  53:              scriptString = Regex.Replace(scriptString, @"<[ ]*script", "[script ", RegexOptions.IgnoreCase);
  54:              scriptString = Regex.Replace(scriptString, @"/[ ]*script[ ]*>", " /script]", RegexOptions.IgnoreCase);
  55:   
  56:              return scriptString;
  57:          }
  58:          #endregion
posted @ 2014-05-09 13:33  MyFirstHome  阅读(528)  评论(0编辑  收藏  举报