C#htmlparser分析网页

先把Winista.HtmlParser.dll放到bin文件夹下面。

using System;
using System.Collections.Generic;
using System.Text;
using Winista.Text.HtmlParser.Visitors;
using Winista.Text.HtmlParser;
using Winista.Text.HtmlParser.Util;//包含ParserException
using Winista.Text.HtmlParser.Filters;
using System.Net;
using Winista.Text.HtmlParser.Lex;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            string strurl = "http://www.weather.com.cn/weather/101310101.shtml?from=cn";
            //Parser parser = new Parser(new Uri(strurl));
            WebClient aWebClient = new WebClient();
            aWebClient.Encoding = System.Text.Encoding.UTF8;
            string htmlcode = aWebClient.DownloadString(strurl);
            Lexer lex = new Lexer(htmlcode); //Lexer:对HTML代码进行词干分析
            Parser parser = new Parser(lex);

            //Parser parser = new Parser(new Winista.Text.HtmlParser.Http.HttpProtocol(new Uri("http://www.baidu.com/s?wd=%C0%AC%BB%F8%B9%B7%C8%D5%B5%C4%B0%D9%B6%C8&pn=0")));
            //string str = "<table><tr><td><div>姓名</div></td><td>林肯</td></tr><tr><td>年龄</td><td>28</td></tr><tr><td>性别</td><td>男</td></tr><tr><td>姓名</td><td>克林顿</td></tr><tr><td>年龄</td><td>38</td></tr><tr><td>性别</td><td>男</td></tr></table>";
            //Parser parser = Parser.CreateParser(str, null);
            // 这里是控制测试的局部,后面的例子修改的就是这个地方。
            NodeFilter filter = new TagNameFilter("DIV");
            NodeList nodes = parser.ExtractAllNodesThatMatch(filter);
            Console.Write(nodes.Count);
            Console.Read();
        }
    }


}

显示结果是:59

说明测试成功!呵呵!

posted @ 2011-12-29 11:10  szjdw  阅读(377)  评论(0编辑  收藏  举报