悟生慧

 

去掉HTML标记的各种方法

using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Text;
using System.Text.RegularExpressions;

/// <summary>
/// Summary description for StringUtilily
/// </summary>
public class StringUtilily
{
      /// <summary>
      /// 随机生成字符串源
      /// </summary>
      public const string RANDOM_STRING_SOURCE = "0123456789abcdefghijklmnopqrstuvwxyz";

      public StringUtilily()
      {
          //
          // TODO: Add constructor logic here
          //
      }
      /// <summary>
      /// 替换字符串
      /// </summary>
      /// <param name="src">要修改的字符串</param>
      /// <param name="pattern">要匹配的正则表达式模式</param>
      /// <param name="replacement">替换字符串</param>
      /// <returns>已修改的字符串</returns>
      public static string Replace(string src, string pattern, string replacement)
      {
          return Replace(src, pattern, replacement, RegexOptions.None);
      }

      /// <summary>
      /// 替换字符串,不区分大小写
      /// </summary>
      /// <param name="src">要修改的字符串</param>
      /// <param name="pattern">要匹配的正则表达式模式</param>
      /// <param name="replacement">替换字符串</param>
      /// <returns>已修改的字符串</returns>
      public static string ReplaceIgnoreCase(string src, string pattern, string replacement)
      {
          return Replace(src, pattern, replacement, RegexOptions.IgnoreCase);
      }

      /// <summary>
      /// 替换字符串
      /// </summary>
      /// <param name="src">要修改的字符串</param>
      /// <param name="pattern">要匹配的正则表达式模式</param>
      /// <param name="replacement">替换字符串</param>
      /// <param name="options">匹配模式</param>
      /// <returns>已修改的字符串</returns>
      public static string Replace(string src, string pattern, string replacement, RegexOptions options)
      {
          Regex regex = new Regex(pattern, options | RegexOptions.Compiled);

          return regex.Replace(src, replacement);
      }

      /// <summary>
      /// 删除字符串中指定的内容
      /// </summary>
      /// <param name="src">要修改的字符串</param>
      /// <param name="pattern">要删除的正则表达式模式</param>
      /// <returns>已删除指定内容的字符串</returns>
      public static string Drop(string src, string pattern)
      {
          return Replace(src, pattern, "");
      }

      /// <summary>
      /// 删除字符串中指定的内容,不区分大小写
      /// </summary>
      /// <param name="src">要修改的字符串</param>
      /// <param name="pattern">要删除的正则表达式模式</param>
      /// <returns>已删除指定内容的字符串</returns>
      public static string DropIgnoreCase(string src, string pattern)
      {
          return ReplaceIgnoreCase(src, pattern, "");
      }

      /// <summary>
      /// 替换字符串到数据库可输入模式
      /// </summary>
      /// <param name="src">待插入数据库的字符串</param>
      /// <returns>可插入数据库的字符串</returns>
      public static string ToSQL(string src)
      {
          if (src == null)
          {
              return null;
          }
          return Replace(src, "'", "''");
      }

      /// <summary>
      /// 去掉html内容中的指定的html标签
      /// </summary>
      /// <param name="content">html内容</param>
      /// <param name="tagName">html标签</param>
      /// <returns>去掉标签的内容</returns>
      public static string DropHtmlTag(string content, string tagName)
      {
          //去掉<tagname>和</tagname>
          return DropIgnoreCase(content, "<[/]{0,1}" + tagName + "[^\\>]*\\>");
      }

      /// <summary>
      /// 去掉html内容中全部标签
      /// </summary>
      /// <param name="content">html内容</param>
      /// <returns>去掉html标签的内容</returns>
      public static string DropHtmlTag(string content)
      {
          //去掉<*>
          return Drop(content, "<[^\\>]*>");
      }

      /// <summary>
      /// 生成随机字符串
      /// </summary>
      /// <param name="num">字符串的位数</param>
      /// <returns>可插入数据库的字符串</returns>
      public static string GetRandomString(int num)
      {
          string rndStr = "";
          Random rnd = new Random();
          for (int i = 0; i < num; i++)
          {
              rndStr += RANDOM_STRING_SOURCE.Substring(Convert.ToInt32(Math.Round(rnd.NextDouble() * 36, 0)), 1);
          }
          return rndStr;
      }
      /// <summary>
      /// 判断一个数据是不是数字
      /// </summary>
      /// <param name="inputData">字符串</param>
      /// <returns>结果</returns>
      public static bool IsNumeric(string inputData)
      {
          Regex _isNumber = new Regex(@"^\d+$");
          Match m = _isNumber.Match(inputData);
          return m.Success;
      }

      /// <summary>
      /// 转换html标签为web页可见内容
      /// </summary>
      /// <param name="src"></param>
      /// <returns></returns>
      public static string EscapeHtml(string src)
      {
          if (src == null)
          {
              return null;
          }
          string s = src;
          s = Replace(s, ">", "&gt;");
          s = Replace(s, "<", "&lt;");
          return s;
      }

      /// <summary>
      /// 将字符串格式化成HTML代码
      /// </summary>
      /// <param name="str">要格式化的字符串</param>
      /// <returns>格式化后的字符串</returns>
      public static String ToHtml(string str)
      {
          if (str == null || str.Equals(""))
          {
              return str;
          }

          StringBuilder sb = new StringBuilder(str);
          sb.Replace("&", "&amp;");
          sb.Replace("<", "&lt;");
          sb.Replace(">", "&gt;");
          sb.Replace("\r\n", "<br>");
          sb.Replace("\n", "<br>");
          sb.Replace("\t", " ");
          sb.Replace(" ", "&nbsp;");
          return sb.ToString();
      }


      /// <summary>
      /// 将HTML代码转化成文本格式
      /// </summary>
      /// <param name="str">要格式化的字符串</param>
      /// <returns>格式化后的字符串</returns>
      public static String ToTxt(String str)
      {
          if (str == null || str.Equals(""))
          {
              return str;
          }

          StringBuilder sb = new StringBuilder(str);
          sb.Replace("&nbsp;", " ");
          sb.Replace("<br>", "\r\n");
          sb.Replace("&lt;", "<");
          sb.Replace("&gt;", ">");
          sb.Replace("&amp;", "&");
          return sb.ToString();

             System.IO.StreamReader sr = new StreamReader("d:\\bb.html", System.Text.Encoding.UTF8);
             string HtmlString = sr.ReadToEnd();
             string Temp = HtmlString.Substring(HtmlString.IndexOf("<body>"));
             string Stup1 = Regex.Replace(Temp, @"<script>[\s\S]*</script>", "");   //过滤Js代码;
             string Stup2 = Regex.Replace(Stup1, @"<style[\s\S]*</style>", "");       //过滤Css样式
             string Stup3 = Regex.Replace(Stup2, "<.+?>", "");                        //过滤html标签
             string Stup4 = Regex.Replace(Stup3, @"\s", "");

         }
     }
}

posted on 2010-07-16 11:19  悟生慧  阅读(1891)  评论(0编辑  收藏  举报

导航