字符串截取

View Code
 1  ///   <summary>   
 2         ///   将指定字符串按指定长度进行剪切,   
 3         ///   </summary>   .   ///   <param   name= "oldStr "> 需要截断的字符串 </param>   
 4         ///   <param   name= "maxLength "> 字符串的最大长度 </param>   
 5         ///   <param   name= "endWith "> 超过长度的后缀 </param>   
 6         ///   <returns> 如果超过长度,返回截断后的新字符串加上后缀,否则,返回原字符串 </returns>   
 7         public static string StringTruncat(string oldStr, int maxLength, string endWith)
 8         {
 9             if (string.IsNullOrEmpty(oldStr))
10                 //   throw   new   NullReferenceException( "原字符串不能为空 ");   
11                 return oldStr + endWith;
12             if (maxLength < 1) throw new Exception("返回的字符串长度必须大于[0] ");
13             if (oldStr.Length > maxLength)
14             {
15                 string strTmp = oldStr.Substring(0, maxLength);
16                 if (string.IsNullOrEmpty(endWith))
17                     return strTmp;
18                 else
19                     return strTmp + endWith;
20             }
21             return oldStr;
22         }

 

View Code
 1  /// <summary>
 2        /// 从字符串中截取图片信息
 3        /// </summary>
 4        /// <param name="str"></param>
 5        /// <returns></returns>
 6         protected string GetSrc(string str)
 7         {
 8             string regStr = "\\<IMG\\ [\\s\\S]*?src=['\"]?(?<p>[^'\"\\>\\ ]+)['\"\\>\\ ]";
 9             string cont1 = string.Empty; //图片的src
10             Regex reg = new Regex(regStr, RegexOptions.Compiled | RegexOptions.IgnoreCase);
11             Match match = reg.Match(str);
12             string picSrc = "";
13             if (match.Success)
14             {
15                 picSrc = match.Groups["p"].Value;
16                 return picSrc;
17             }
18             else 
19             {
20                 return "";
21             }
22         }

 

View Code
 1         /*
 2       * *
 3       * 按字节长度截取字符串(支持截取带HTML代码样式的字符串)  *
 4       * @param param 将要截取的字符串参数  * @param length 截取的字节长度  * 
 5       * @param end 字符串末尾补上的字符串  * @return 返回截取后的字符串  */
 6         public static string subStringHTML(string param, int length, string end)
 7         {
 8             string Pattern = null; MatchCollection m = null; StringBuilder result = new StringBuilder(); int n = 0; char temp; 
 9             bool isCode = false; //是不是HTML代码
10             bool isHTML = false; //是不是HTML特殊字符,如&nbsp;   
11             char[] pchar = param.ToCharArray();
12             for (int i = 0; i < pchar.Length; i++)
13             {
14                 temp = pchar[i];
15                 if (temp == '<')
16                 {
17                     isCode = true;
18                 }
19                 else if (temp == '&')
20                 {
21                     isHTML = true;
22                 }
23                 else if (temp == '>' && isCode)
24                 {
25                     n = n - 1; isCode = false;
26                 }
27                 else if (temp == ';' && isHTML)
28                 {
29                     isHTML = false;
30                 }
31                 if (!isCode && !isHTML)
32                 {
33                     n = n + 1;     //UNICODE码字符占两个字节    
34                     //if (System.Text.Encoding.Default.GetBytes(temp + "").Length > 1)
35                     //{
36                     //    n = n + 1;
37                     //}
38                 }
39                 result.Append(temp);
40                 if (n >= length)
41                 { break; }
42             }
43             if (result.Length > length)
44             {
45                 result.Append("..."); 
46             }
47              
48             //取出截取字符串中的HTML标记   
49             string temp_result = result.ToString().Replace("(>)[^<>]*(<?)", "$1$2");
50             //去掉不需要结素标记的HTML标记  
51             temp_result = temp_result.Replace(@"</?(AREA|BASE|BASEFONT|BODY|BR|COL|COLGROUP|DD|DT|FRAME|HEAD|HR|HTML|IMG|INPUT|ISINDEX|LI|LINK|META|OPTION|P|PARAM|TBODY|TD|TFOOT|TH|THEAD|TR|area|base|basefont|body|br|col|colgroup|dd|dt|frame|head|hr|html|img|input|isindex|li|link|meta|option|p|param|tbody|td|tfoot|th|thead|tr)[^<>]*/?>", "");   //去掉成对的HTML标记 
52             temp_result = temp_result.Replace(@"<([a-zA-Z]+)[^<>]*>(.*?)<//1>", "$2");   
53             //用正则表达式取出标记   
54             Pattern = ("<([a-zA-Z]+)[^<>]*>"); 
55             m = Regex.Matches(temp_result, Pattern); 
56             ArrayList endHTML = new ArrayList(); 
57             foreach (Match mt in m) 
58             {
59                 endHTML.Add(mt.Result("$1"));
60             }
61             //补全不成对的HTML标记 
62             for (int i = endHTML.Count - 1; i >= 0; i--)
63             { 
64                 result.Append("</"); result.Append(endHTML[i]); result.Append(">");
65             }
66             return result.ToString();
67         }

 

posted on 2013-01-21 11:03  沐鈅  阅读(230)  评论(0编辑  收藏  举报