提取HTML代码中的网址

    // 提取HTML代码中的网址
    static ArrayList GetHyperLinks(string htmlCode)
    {
        ArrayList al = new ArrayList();
        string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
        Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
        MatchCollection m = r.Matches(htmlCode);
        for (int i = 0; i <= m.Count - 1; i++)
        {
            bool rep = false;
            string strNew = m[i].ToString();
            // 过滤重复的URL
            foreach (string str in al)
            {
                if (strNew == str)
                {
                    rep = true;
                    break;
                }
            }
            if (!rep) al.Add(strNew);
        }
        al.Sort();
        return al;
    }

posted on 2009-05-13 09:30 郑州--飞猫阅读(340) 评论(0) 编辑收藏举报

刷新页面返回顶部

飞猫--袁金辉

提取HTML代码中的网址

公告

导航