找个搜索结果总数原来可以用到这么多的技术
今天刚刚上班的时候..隔壁同事给分配到个任务.就是做个软件.用可以分析一个关键字在搜索引擎里的搜索总量.
索性自己用Console也弄了个..
主要用到下面几种技术:
1.插件模式
2.事件
3.委托
流程:
1.先实例化一个事件
2.检索插件,将找到的插件实例化并传入事件作为参数执行
3.搜索 -> 下载源码,分析,取出搜索结果总数
4.用委托回调每个结果
解决方案结构:
引用:
1.所有的项目都引用Yans.Implementation
2.Yans.Implementation引用System.Web
代码如下:
1.主程序
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
using System.IO;
using System.Reflection;
namespace Yans.ConsoleApplication
{
class Program
{
static void Main( string[] args ) {
StringBuilder sb = new StringBuilder();
IndexEvent e = new IndexEvent();
e.Actions = ( E, Y ) => {
sb.AppendLine(E.GetType().Name + ":" + Y);
};
//获取插件并实例化到事件中
foreach (var dllFile in Directory.GetFiles(AppDomain.CurrentDomain.BaseDirectory, "*Searcher.dll")) {
var pluginAssembly = Assembly.LoadFrom(dllFile);
foreach (var pluginType in pluginAssembly.GetTypes()) {
if (pluginType.IsPublic && !pluginType.IsAbstract && pluginType.Name.EndsWith("Searcher")) {
try {
Activator.CreateInstance(pluginType, e); //创建实例并传入事件实体作为参数
}
catch (Exception ex) {
Console.WriteLine(ex.Message);
}
}
}
}
//检索
e.RaiseSampleEvent("yans");
Console.WriteLine(sb.ToString());
Console.Read();
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
using System.IO;
using System.Reflection;
namespace Yans.ConsoleApplication
{
class Program
{
static void Main( string[] args ) {
StringBuilder sb = new StringBuilder();
IndexEvent e = new IndexEvent();
e.Actions = ( E, Y ) => {
sb.AppendLine(E.GetType().Name + ":" + Y);
};
//获取插件并实例化到事件中
foreach (var dllFile in Directory.GetFiles(AppDomain.CurrentDomain.BaseDirectory, "*Searcher.dll")) {
var pluginAssembly = Assembly.LoadFrom(dllFile);
foreach (var pluginType in pluginAssembly.GetTypes()) {
if (pluginType.IsPublic && !pluginType.IsAbstract && pluginType.Name.EndsWith("Searcher")) {
try {
Activator.CreateInstance(pluginType, e); //创建实例并传入事件实体作为参数
}
catch (Exception ex) {
Console.WriteLine(ex.Message);
}
}
}
}
//检索
e.RaiseSampleEvent("yans");
Console.WriteLine(sb.ToString());
Console.Read();
}
}
}
Ps: RaiseSampleEvent是执行搜索..内容请自行修改..
2.调用的事件
分析结果
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
namespace Yans.Implementation.Searcher
{
public class CodeAnalyzer
{
/// <summary>
/// 分析结果
/// </summary>
/// <param name="uri">查询地址(设定查询的参数为{0})</param>
/// <param name="query">查询参数</param>
/// <param name="pattern">过滤查询结果</param>
/// <param name="encoding">编码</param>
/// <returns>分析结果</returns>
public static string Analyzer( string uri, string query, string pattern, string encoding ) {
using (var client = new WebClienter()) {
client.Encoding = Encoding.GetEncoding(encoding);
//下载源码
var source = client.DownLoadString(string.Format(uri, HttpUtility.UrlEncode(query, Encoding.GetEncoding(encoding))));
//分析结果
Regex myRegex = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
Match myMatch = myRegex.Match(source);
if (myMatch.Success && myMatch.Groups.Count > 1) {
return myMatch.Groups[1].Value;
}
}
return "[未分析到]";
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
namespace Yans.Implementation.Searcher
{
public class CodeAnalyzer
{
/// <summary>
/// 分析结果
/// </summary>
/// <param name="uri">查询地址(设定查询的参数为{0})</param>
/// <param name="query">查询参数</param>
/// <param name="pattern">过滤查询结果</param>
/// <param name="encoding">编码</param>
/// <returns>分析结果</returns>
public static string Analyzer( string uri, string query, string pattern, string encoding ) {
using (var client = new WebClienter()) {
client.Encoding = Encoding.GetEncoding(encoding);
//下载源码
var source = client.DownLoadString(string.Format(uri, HttpUtility.UrlEncode(query, Encoding.GetEncoding(encoding))));
//分析结果
Regex myRegex = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
Match myMatch = myRegex.Match(source);
if (myMatch.Success && myMatch.Groups.Count > 1) {
return myMatch.Groups[1].Value;
}
}
return "[未分析到]";
}
}
}
Ps:调用下载源码并用正则的方式把要的结果过滤出来.
3.调用的事件
事件
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Yans.Implementation.Searcher
{
//Searcher事件
public class IndexEvent
{
//定义委托
public delegate void SampleEventHandler( object sender, string e );
//定义回调
public Action<object, string> Actions;
//定义事件
public event SampleEventHandler SampleEvent;
//执行事件
public virtual void RaiseSampleEvent( string word ) {
if (SampleEvent != null)
SampleEvent(this, word);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Yans.Implementation.Searcher
{
//Searcher事件
public class IndexEvent
{
//定义委托
public delegate void SampleEventHandler( object sender, string e );
//定义回调
public Action<object, string> Actions;
//定义事件
public event SampleEventHandler SampleEvent;
//执行事件
public virtual void RaiseSampleEvent( string word ) {
if (SampleEvent != null)
SampleEvent(this, word);
}
}
}
Ps:event这东西.会用的话还是很强大的.
4.代码下载类
代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
namespace Yans.Implementation.Searcher
{
public class WebClienter : IDisposable
{
#region property
/// <summary>
/// 用户身份
/// </summary>
public ICredentials Credential { set; get; }
Encoding encoding = Encoding.UTF8;
/// <summary>
/// 下载源编码
/// </summary>
public Encoding Encoding {
set { encoding = value; }
get { return encoding; }
}
#endregion
/// <summary>
/// 根据地址下载该网页的源代码
/// </summary>
/// <param name="UriAddress"></param>
/// <returns></returns>
public string DownLoadString( Uri UriAddress ) {
WebClient client = new WebClient();
if (null != Credential) client.Credentials = Credential;
client.Encoding = Encoding;
return client.DownloadString(UriAddress);
}
/// <summary>
/// 根据地址下载该网页的源代码
/// </summary>
/// <param name="UriAddress"></param>
/// <returns></returns>
public string DownLoadString( string UriAddress ) {
return DownLoadString(new Uri(UriAddress));
}
#region IDisposable 成员
public void Dispose() {
Encoding = null;
Credential = null;
}
#endregion
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
namespace Yans.Implementation.Searcher
{
public class WebClienter : IDisposable
{
#region property
/// <summary>
/// 用户身份
/// </summary>
public ICredentials Credential { set; get; }
Encoding encoding = Encoding.UTF8;
/// <summary>
/// 下载源编码
/// </summary>
public Encoding Encoding {
set { encoding = value; }
get { return encoding; }
}
#endregion
/// <summary>
/// 根据地址下载该网页的源代码
/// </summary>
/// <param name="UriAddress"></param>
/// <returns></returns>
public string DownLoadString( Uri UriAddress ) {
WebClient client = new WebClient();
if (null != Credential) client.Credentials = Credential;
client.Encoding = Encoding;
return client.DownloadString(UriAddress);
}
/// <summary>
/// 根据地址下载该网页的源代码
/// </summary>
/// <param name="UriAddress"></param>
/// <returns></returns>
public string DownLoadString( string UriAddress ) {
return DownLoadString(new Uri(UriAddress));
}
#region IDisposable 成员
public void Dispose() {
Encoding = null;
Credential = null;
}
#endregion
}
}
PS:以前写采集器用过的一个类..稍微对webclient做了一点点的修改..现在直接拿来用了.
5.插件
百度插件
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
namespace Yans.Searcher.BaiduSearcher
{
public class BaiduSearcher
{
public BaiduSearcher( IndexEvent e ) {
e.SampleEvent += new IndexEvent.SampleEventHandler(e_SampleEvent);
}
void e_SampleEvent( object sender, string e ) {
string result = CodeAnalyzer.Analyzer(@"http://www.baidu.com/s?wd={0}", e, @"相关网页约([\d,]*)篇,", "gb2312");
var searcherEvent = (IndexEvent)sender;
if (null != searcherEvent.Actions) searcherEvent.Actions(this, result);
Console.WriteLine("Baidu Searched result count:" + result);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
namespace Yans.Searcher.BaiduSearcher
{
public class BaiduSearcher
{
public BaiduSearcher( IndexEvent e ) {
e.SampleEvent += new IndexEvent.SampleEventHandler(e_SampleEvent);
}
void e_SampleEvent( object sender, string e ) {
string result = CodeAnalyzer.Analyzer(@"http://www.baidu.com/s?wd={0}", e, @"相关网页约([\d,]*)篇,", "gb2312");
var searcherEvent = (IndexEvent)sender;
if (null != searcherEvent.Actions) searcherEvent.Actions(this, result);
Console.WriteLine("Baidu Searched result count:" + result);
}
}
}
谷歌插件
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
namespace Yans.Searcher.GoogleSearcher
{
public class GoogleSearcher
{
public GoogleSearcher( IndexEvent e ) {
e.SampleEvent += new IndexEvent.SampleEventHandler(e_SampleEvent);
}
void e_SampleEvent( object sender, string e ) {
string result = CodeAnalyzer.Analyzer("http://www.google.cn/search?hl=zh-CN&source=hp&q={0}&aq=f&aqi=g10&aql=&oq=", e, @"获得约 <b>([\d,]*)</b> 条结果", "gb2312");
var searcherEvent = (IndexEvent)sender;
if (null != searcherEvent.Actions) searcherEvent.Actions(this, result);
Console.WriteLine("Google searched result count:" + result);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
namespace Yans.Searcher.GoogleSearcher
{
public class GoogleSearcher
{
public GoogleSearcher( IndexEvent e ) {
e.SampleEvent += new IndexEvent.SampleEventHandler(e_SampleEvent);
}
void e_SampleEvent( object sender, string e ) {
string result = CodeAnalyzer.Analyzer("http://www.google.cn/search?hl=zh-CN&source=hp&q={0}&aq=f&aqi=g10&aql=&oq=", e, @"获得约 <b>([\d,]*)</b> 条结果", "gb2312");
var searcherEvent = (IndexEvent)sender;
if (null != searcherEvent.Actions) searcherEvent.Actions(this, result);
Console.WriteLine("Google searched result count:" + result);
}
}
}
搜搜插件
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
namespace Yans.Searcher.SosoSearcher
{
public class SosoSearcher
{
public SosoSearcher( IndexEvent e ) {
e.SampleEvent += new IndexEvent.SampleEventHandler(e_SampleEvent);
}
void e_SampleEvent( object sender, string e ) {
string result = CodeAnalyzer.Analyzer("http://www.soso.com/q?pid=s.idx&w={0}", e, @"搜索到约([\d,]*)项结果,用", "gb2312");
var searcherEvent = (IndexEvent)sender;
if (null != searcherEvent.Actions) searcherEvent.Actions(this, result);
Console.WriteLine("Soso searched result count:" + result);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Yans.Implementation.Searcher;
namespace Yans.Searcher.SosoSearcher
{
public class SosoSearcher
{
public SosoSearcher( IndexEvent e ) {
e.SampleEvent += new IndexEvent.SampleEventHandler(e_SampleEvent);
}
void e_SampleEvent( object sender, string e ) {
string result = CodeAnalyzer.Analyzer("http://www.soso.com/q?pid=s.idx&w={0}", e, @"搜索到约([\d,]*)项结果,用", "gb2312");
var searcherEvent = (IndexEvent)sender;
if (null != searcherEvent.Actions) searcherEvent.Actions(this, result);
Console.WriteLine("Soso searched result count:" + result);
}
}
}
Ps:这三个插件都是作为一个独立的项目存在的,如果需要添加新的插件..可以再添加一个项目..生成为dll文件后,放到程序的目录里.就可以直接使用了..
总结:
1.插件模式的原理..我的理解就是让程序去搜索可用的插件,并将其在需要使用的时候实例化..
2.此文也算是扔个砖头引块玉来.. - - 那成语怎么说来着?
3.本程序在VS2010下调用通过..VS2008或VS2005可能会做点修改..