C#htmlparser分析网页
先把Winista.HtmlParser.dll放到bin文件夹下面。
using System;
using System.Collections.Generic;
using System.Text;
using Winista.Text.HtmlParser.Visitors;
using Winista.Text.HtmlParser;
using Winista.Text.HtmlParser.Util;//包含ParserException
using Winista.Text.HtmlParser.Filters;
using System.Net;
using Winista.Text.HtmlParser.Lex;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string strurl = "http://www.weather.com.cn/weather/101310101.shtml?from=cn";
//Parser parser = new Parser(new Uri(strurl));
WebClient aWebClient = new WebClient();
aWebClient.Encoding = System.Text.Encoding.UTF8;
string htmlcode = aWebClient.DownloadString(strurl);
Lexer lex = new Lexer(htmlcode); //Lexer:对HTML代码进行词干分析
Parser parser = new Parser(lex);
//Parser parser = new Parser(new Winista.Text.HtmlParser.Http.HttpProtocol(new Uri("http://www.baidu.com/s?wd=%C0%AC%BB%F8%B9%B7%C8%D5%B5%C4%B0%D9%B6%C8&pn=0")));
//string str = "<table><tr><td><div>姓名</div></td><td>林肯</td></tr><tr><td>年龄</td><td>28</td></tr><tr><td>性别</td><td>男</td></tr><tr><td>姓名</td><td>克林顿</td></tr><tr><td>年龄</td><td>38</td></tr><tr><td>性别</td><td>男</td></tr></table>";
//Parser parser = Parser.CreateParser(str, null);
// 这里是控制测试的局部,后面的例子修改的就是这个地方。
NodeFilter filter = new TagNameFilter("DIV");
NodeList nodes = parser.ExtractAllNodesThatMatch(filter);
Console.Write(nodes.Count);
Console.Read();
}
}
}
显示结果是:59
说明测试成功!呵呵!