程晓晖

博客园 首页 新随笔 联系 订阅 管理

//
        //得到title信息
        //
        public static string getTitleByHtml(string sHtml)//鑾峰彇鏍囬   锛堢幇鏈夊皢鍐呭鍘绘帀鎹㈣绗﹀啀閰嶇疆闇€鏀硅繘锛?
   
       {
            //string regex="(<title[>])(.*)(<\/title>)";  
            //String^ regex = "(?<=<[\\s\\S]*?title[\\s\\S]*?>)(?<title>[\\s\\S]*?)(?=</title>)";
            string regex = "(?<=<title>)[\\s\\S]*?(?=(</title>)|(/title>))";
            Regex r = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.Multiline);
            return r.Match(sHtml).Value.Trim();
        }

//
        //清除html标记
        //
        public static string clearAllTag(string str)
        {
            //str = Regex.Replace(str, "<a[\\s\\S]*?[^>]*?>|</a>", "", RegexOptions.IgnoreCase);//鍘婚櫎瓒呰繛鎺ユ爣绛?
     
            str = Regex.Replace(str, "<<[^>]+>", "", RegexOptions.IgnoreCase | RegexOptions.Multiline);//鍘婚櫎html鏍囩
            str = HttpUtility.HtmlDecode(str);
            str = Regex.Replace(str, "[ ]+", " ");
            return str;
        }

posted on 2010-11-11 15:44  fumen  阅读(413)  评论(0编辑  收藏  举报