获取文本中所有的<img>标签的位置,获取所有img标签的src

        public static int[] GetImagePos(string str)
        {
            str = str.Replace("$", " ");
            str = Regex.Replace(str, @"(?!<img.+?>)<.+?>", "");
            str = Regex.Replace(str, @"<img\b[^>]*>", "$");
            int startPos = 0;
            int foundPos = -1;
            int count = 0;
            List<int> foundItems = new List<int>();

            do
            {
                foundPos = str.IndexOf("$", startPos);
                if (foundPos > -1)
                {
                    startPos = foundPos + 1;
                    count++;
                    foundItems.Add(foundPos);
                }
            } while (foundPos > -1 && startPos < str.Length);

            return ((int[])foundItems.ToArray());
        }

  

        /// <summary>  
        /// 获取Img的路径  
        /// </summary>  
        /// <param name="htmlText">Html字符串文本</param>  
        /// <returns>以数组形式返回图片路径</returns>  
        public static string[] GetHtmlImageUrlList(string htmlText)
        {
            Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
            //新建一个matches的MatchCollection对象 保存 匹配对象个数(img标签)  
            MatchCollection matches = regImg.Matches(htmlText);
            int i = 0;
            string[] sUrlList = new string[matches.Count];
            //遍历所有的img标签对象  
            foreach (Match match in matches)
            {
                //获取所有Img的路径src,并保存到数组中  
                sUrlList[i++] = match.Groups["imgUrl"].Value;
            }
            return sUrlList;
        }

  

posted @ 2017-11-23 10:29  樱满集  阅读(2231)  评论(0编辑  收藏  举报