C#网页采集
/// <summary> /// 返回提取数组 /// </summary> /// <param name="rex">正则</param> /// <param name="urlValue">字符串</param> /// <returns></returns> private string[] rexID(string rex, string urlValue) { ArrayList al = new ArrayList(); string strRegex = rex; Regex r = new Regex(strRegex, RegexOptions.IgnoreCase); MatchCollection m = r.Matches(urlValue); for (int i = 0; i <= m.Count - 1; i++) { bool rep = false; string strNew = m[i].ToString(); string zregexStr = rex; Regex l = new Regex(zregexStr, RegexOptions.None); Match mc = l.Match(strNew); string dataStr = mc.Groups["key"].Value; // 过滤重复的URL foreach (string str in al) { if (strNew == str) { rep = true; break; } } if (!rep) { al.Add(dataStr); } } string[] shuzu = new string[al.Count]; int id = 0; foreach (string item in al) { shuzu[id] = item; id++; } return shuzu; }
放下电子产品,每天进步一点点