c#正则表达式获取html超链接

 private Dictionary<string, string> GetUrl(string content)
        { 
            Dictionary<string, string> dics = new Dictionary<string, string>();

            string pattern = @"(?is)<a[^>]*?href=(['""]?)(?<url>[^'""\s>]+)\1[^>]*>(?<text>(?:(?!</?a\b).)*)</a>";
            MatchCollection mc = Regex.Matches(content, pattern);
            foreach (Match m in mc)
            {
                if (m.Success)
                {
                    //加入集合数组
                    //hrefList.Add(m.Groups["href"].Value);
                    //nameList.Add(m.Groups["name"].Value);
                    try
                    {
                        dics.Add(m.Groups["url"].Value, m.Groups["text"].Value);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                }
            }
            return dics;
        }

 

posted @ 2013-02-19 14:03  biubiubiu  阅读(599)  评论(0编辑  收藏  举报