/// <summary> /// 根据html文本返回url地址集合 /// </summary> /// <param name="sHtmlText"></param> /// <returns></returns> private List<string> GetImgUrlByHtmlText(string sHtmlText) { // 定义正则表达式用来匹配 img 标签 Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(sHtmlText); List<string> sUrlList = new List<string>(); // 取得匹配项列表 foreach (Match match in matches) { if (match.Groups["imgUrl"].Value != "") { sUrlList.Add(match.Groups["imgUrl"].Value); } } // 定义正则表达式用来匹配 table 标签 Regex regTable = new Regex(@"<(table|tr|td)\b[^<>]*?\bbackground[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<backgroundUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); MatchCollection matchesTable = regTable.Matches(sHtmlText); // 取得匹配项列表 foreach (Match match in matchesTable) { if (match.Groups["backgroundUrl"].Value != "") { sUrlList.Add(match.Groups["backgroundUrl"].Value); } } return sUrlList; }