C# 正则 获取 Img Src路径
1 string str = "<form id=\"form1\" runat=\"server\"><div><p><img src=\"img/01300000278409123123126754611.jpg\" alt=\"\" /></p><p><img src=\"img/16583711.jpg\" alt=\"\" /></p><p><img src=\"img/2_091208000854_6.jpg\" alt=\"\" /></p></div></form>"; 2 for (int j = 0; j < GetHtmlImageUrlList(str).Length; j++) 3 { 4 //获取图片路径字符串 5 string img = GetHtmlImageUrlList(str)[i]; 6 //删除服务器上的图片 7 string filepath = Server.MapPath(img); 8 FileInfo file = new FileInfo(filepath); 9 if (file.Exists) 10 { 11 file.Delete(); 12 } 13 }
/// <summary> /// 获取Img的路径 /// </summary> /// <param name="htmlText">Html字符串文本</param> /// <returns>以数组形式返回图片路径</returns> public static string[] GetHtmlImageUrlList(string htmlText) { Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); //新建一个matches的MatchCollection对象 保存 匹配对象个数(img标签) MatchCollection matches = regImg.Matches(htmlText); int i = 0; string[] sUrlList = new string[matches.Count]; //遍历所有的img标签对象 foreach (Match match in matches) { //获取所有Img的路径src,并保存到数组中 sUrlList[i++] = match.Groups["imgUrl"].Value; } return sUrlList; }