.NET获取Html字符串中指定标签的指定属性的值

using System.Text;
using System.Text.RegularExpressions;
//以上为要用到的命名空间

    /// <summary>
    /// 获取Html字符串中指定标签的指定属性的值 
    /// </summary>
    /// <param name="html">Html字符</param>
    /// <param name="tag">指定标签名</param>
    /// <param name="attr">指定属性名</param>
    /// <returns></returns>
    private List<string> GetHtmlAttr(string html, string tag, string attr)
    {

        Regex re = new Regex(@"(<" + tag + @"[\w\W].+?>)");
        MatchCollection imgreg = re.Matches(html);
        List<string> m_Attributes = new List<string>();
        Regex attrReg = new Regex(@"([a-zA-Z1-9_-]+)\s*=\s*(\x27|\x22)([^\x27\x22]*)(\x27|\x22)", RegexOptions.IgnoreCase);

        for (int i = 0; i < imgreg.Count; i++)
        {
            MatchCollection matchs = attrReg.Matches(imgreg[i].ToString());

            for (int j = 0; j < matchs.Count; j++)
            {
                GroupCollection groups = matchs[j].Groups;

                if (attr.ToUpper() == groups[1].Value.ToUpper())
                {
                    m_Attributes.Add(groups[3].Value);
                    break;
                }
            }

        }

        return m_Attributes;

    }

 

posted @ 2017-03-30 20:46  孔小爽  阅读(2045)  评论(0编辑  收藏  举报
作者:孔小爽 引用请标明出处:https://www.cnblogs.com/kongxiaoshuang/