爬虫/蜘蛛程序的制作(C#语言) 二
今天给大家列出一些代码,仅供参考
列出数据层和逻辑层的代码
这个是逻辑层的一个辅助类
这个是列表类
页面工具类
这就是所有的业务层代码,数据层可以对sqlserver和mysq进行操作 download
列出数据层和逻辑层的代码
WebPage类
1using System;
2using System.Collections.Generic;
3using System.Text;
4using System.Web;
5using System.Web.SessionState;
6using System.Web.UI;
7using System.Web.UI.WebControls;
8using System.Web.UI.HtmlControls;
9namespace WebPage
10{
11 public class StringHelper
12 {
13 public StringHelper()
14 {
15 }
16 /**//// <summary>
17 /// 将中文字符转换为URL编码格式
18 /// </summary>
19 /// <param name="strText">字符串</param>
20 /// <returns>URL编码格式</returns>
21 static public string GetChineseURLCode(string strText)
22 {
23 int len = strText.Length;
24 string myStr = "";
25 for (int i = 0; i < len; i++)
26 {
27 myStr += getSpell(strText.Substring(i, 1));
28 }
29 return myStr;
30 }
31
32 static private string getSpell(string cnChar)
33 {
34 byte[] arrCN = System.Text.Encoding.Default.GetBytes(cnChar);
35 if (arrCN.Length > 1)
36 {
37 return System.Web.HttpUtility.UrlEncode(cnChar);
38 }
39 else
40 {
41 return cnChar;
42 }
43 }
44 }
45}
46
1using System;
2using System.Collections.Generic;
3using System.Text;
4using System.Web;
5using System.Web.SessionState;
6using System.Web.UI;
7using System.Web.UI.WebControls;
8using System.Web.UI.HtmlControls;
9namespace WebPage
10{
11 public class StringHelper
12 {
13 public StringHelper()
14 {
15 }
16 /**//// <summary>
17 /// 将中文字符转换为URL编码格式
18 /// </summary>
19 /// <param name="strText">字符串</param>
20 /// <returns>URL编码格式</returns>
21 static public string GetChineseURLCode(string strText)
22 {
23 int len = strText.Length;
24 string myStr = "";
25 for (int i = 0; i < len; i++)
26 {
27 myStr += getSpell(strText.Substring(i, 1));
28 }
29 return myStr;
30 }
31
32 static private string getSpell(string cnChar)
33 {
34 byte[] arrCN = System.Text.Encoding.Default.GetBytes(cnChar);
35 if (arrCN.Length > 1)
36 {
37 return System.Web.HttpUtility.UrlEncode(cnChar);
38 }
39 else
40 {
41 return cnChar;
42 }
43 }
44 }
45}
46
这个是逻辑层的一个辅助类
WebDetail类
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Data;
using System.Text.RegularExpressions;
/**//// <summary>
/// 功能:文章最终页类
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
namespace WebPage
{
//文章最终页类
public class WebDetailPage:WebPage
{
private string _strDelRegex;
public WebDetailPage(WebDetailPageConfig clsDetailPage)
{
Url=clsDetailPage.DetailUrl;
CutRegex=clsDetailPage.strCutRegex;
TimeOut=clsDetailPage.intTimeOut;
filterFlag = clsDetailPage.filterFlag;
_strDelRegex = clsDetailPage.strDelRegex;
}
//获得文章最终页截取的内容
public string GetDetailPageHtml()
{
string strHtml = "";
Match match = matchRegexHtml;
if (match.Success)
{
strHtml = match.Groups["tmpDetailContent"].Value.ToString().Trim();
strHtml = Regex.Replace(strHtml, _strDelRegex, "");
}
return strHtml;
}
}
}
这个是个页面最终解析类using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Data;
using System.Text.RegularExpressions;
/**//// <summary>
/// 功能:文章最终页类
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
namespace WebPage
{
//文章最终页类
public class WebDetailPage:WebPage
{
private string _strDelRegex;
public WebDetailPage(WebDetailPageConfig clsDetailPage)
{
Url=clsDetailPage.DetailUrl;
CutRegex=clsDetailPage.strCutRegex;
TimeOut=clsDetailPage.intTimeOut;
filterFlag = clsDetailPage.filterFlag;
_strDelRegex = clsDetailPage.strDelRegex;
}
//获得文章最终页截取的内容
public string GetDetailPageHtml()
{
string strHtml = "";
Match match = matchRegexHtml;
if (match.Success)
{
strHtml = match.Groups["tmpDetailContent"].Value.ToString().Trim();
strHtml = Regex.Replace(strHtml, _strDelRegex, "");
}
return strHtml;
}
}
}
WebPage类
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
namespace WebPage
{
/**//// <summary>
/// 功能:web页面基类
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
public class WebPage
{
private string _strUrl;
private int _intTimeOut;//设置抓取时间
private string _strCutHtmlRegex;//截取列表内容部分正则
private int _intflag;
//网页的url地址
public string Url
{
get { return _strUrl; }
set { _strUrl = value; }
}
//抓取超时时间
public int TimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
//截取主要内容的正则表达式
public string CutRegex
{
get { return _strCutHtmlRegex; }
set { _strCutHtmlRegex = value; }
}
//网页的内容
public string PageHtml
{
get { return GetUrlstrHtml(); }
}
//获取正则表达式得到的match
public Match matchRegexHtml
{
get { return GetRegexHtml(); }
}
//过滤标志
public int filterFlag
{
get { return _intflag; }
set { _intflag = value; }
}
/**////////////////////////////////////////////////////////////////////////////////////////////
//获取页面内容
protected string GetUrlstrHtml()
{
string strHtml = "";
strHtml = WebPageTools.GetConent(_strUrl, _intTimeOut, filterFlag);
return strHtml;
}
//获取截取的页面内容
protected Match GetRegexHtml()
{
Regex reg = new Regex(_strCutHtmlRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
Match match;
try
{
match = reg.Match(GetUrlstrHtml());
}
catch
{
match = null;
}
return match;
}
}
}
这个是个页面基类using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
namespace WebPage
{
/**//// <summary>
/// 功能:web页面基类
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
public class WebPage
{
private string _strUrl;
private int _intTimeOut;//设置抓取时间
private string _strCutHtmlRegex;//截取列表内容部分正则
private int _intflag;
//网页的url地址
public string Url
{
get { return _strUrl; }
set { _strUrl = value; }
}
//抓取超时时间
public int TimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
//截取主要内容的正则表达式
public string CutRegex
{
get { return _strCutHtmlRegex; }
set { _strCutHtmlRegex = value; }
}
//网页的内容
public string PageHtml
{
get { return GetUrlstrHtml(); }
}
//获取正则表达式得到的match
public Match matchRegexHtml
{
get { return GetRegexHtml(); }
}
//过滤标志
public int filterFlag
{
get { return _intflag; }
set { _intflag = value; }
}
/**////////////////////////////////////////////////////////////////////////////////////////////
//获取页面内容
protected string GetUrlstrHtml()
{
string strHtml = "";
strHtml = WebPageTools.GetConent(_strUrl, _intTimeOut, filterFlag);
return strHtml;
}
//获取截取的页面内容
protected Match GetRegexHtml()
{
Regex reg = new Regex(_strCutHtmlRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
Match match;
try
{
match = reg.Match(GetUrlstrHtml());
}
catch
{
match = null;
}
return match;
}
}
}
WebList类
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using System.Collections;
/**//// <summary>
/// 功能:新闻列表页类
/// 创建时间:07-3-5
/// 创建人:张杰
/// </summary>
namespace WebPage
{
//WebListPage:新闻列表页类
public class WebListPage:WebPage
{
private string _strDelRegex;//删除不需要html的正则
private string _strListRegex;//获取列表正则
private string _strNextPageRegex;//获取下一页的正则
private string _strListPageHtml;
public WebListPage(WebListPageConfig objListPageConfig)
{
Url = objListPageConfig.ListUrl;
CutRegex = objListPageConfig.strCutRegex;
TimeOut = objListPageConfig.intTimeOut;
filterFlag = objListPageConfig.filterFlag;
_strDelRegex = objListPageConfig.strDelRegex;
_strListRegex = objListPageConfig.strListRegex;
_strNextPageRegex = objListPageConfig.strNextPageRegex;
_strListPageHtml = GetListPageHtml();
}
//获得列表页截取的内容
private string GetListPageHtml()
{
string strHtml="";
Match match = matchRegexHtml;
if (match.Success)
{
strHtml = match.Groups["tmpListContent"].Value.ToString().Trim();
strHtml = Regex.Replace(strHtml, _strDelRegex, "");
}
return strHtml;
}
//获得需要文章列表的数组
public List<ListPageItems> GetListArray(string replaceStr,string beReplaceStr)
{
Regex reg = new Regex(_strListRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
MatchCollection mCollect = reg.Matches(_strListPageHtml);
List<ListPageItems> list = new List<ListPageItems>();
if (mCollect.Count > 0)
{
for (int i = 0; i < mCollect.Count - 1; i++)
{
ListPageItems clsListItem = new ListPageItems();
clsListItem.Title = mCollect[i].Groups["Title"].Value.ToString().Trim();
clsListItem.DetailUrl = mCollect[i].Groups["DetailUrl"].Value.ToString().Trim();
string detailUrl = clsListItem.DetailUrl;
if (beReplaceStr != "")
{
detailUrl=detailUrl.Replace(beReplaceStr, replaceStr);
clsListItem.DetailUrl = detailUrl;
}
list.Add(clsListItem);
}
}
return list;
}
//获得下一页网页地址
public string GetNextPage()
{
string strHtml = "";
Regex reg = new Regex(_strNextPageRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
Match match = reg.Match(_strListPageHtml);
if (match.Success)
{
strHtml = match.Groups["NextPageUrl"].Value.ToString().Trim();
}
strHtml = StringHelper.GetChineseURLCode(strHtml);
return strHtml;
}
}
//列表类
public class ListPageItems
{
private string _strTitle;
private string _strDetailUrl;
public string Title
{
get { return _strTitle; }
set { _strTitle = value; }
}
public string DetailUrl
{
get { return _strDetailUrl; }
set { _strDetailUrl = value; }
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using System.Collections;
/**//// <summary>
/// 功能:新闻列表页类
/// 创建时间:07-3-5
/// 创建人:张杰
/// </summary>
namespace WebPage
{
//WebListPage:新闻列表页类
public class WebListPage:WebPage
{
private string _strDelRegex;//删除不需要html的正则
private string _strListRegex;//获取列表正则
private string _strNextPageRegex;//获取下一页的正则
private string _strListPageHtml;
public WebListPage(WebListPageConfig objListPageConfig)
{
Url = objListPageConfig.ListUrl;
CutRegex = objListPageConfig.strCutRegex;
TimeOut = objListPageConfig.intTimeOut;
filterFlag = objListPageConfig.filterFlag;
_strDelRegex = objListPageConfig.strDelRegex;
_strListRegex = objListPageConfig.strListRegex;
_strNextPageRegex = objListPageConfig.strNextPageRegex;
_strListPageHtml = GetListPageHtml();
}
//获得列表页截取的内容
private string GetListPageHtml()
{
string strHtml="";
Match match = matchRegexHtml;
if (match.Success)
{
strHtml = match.Groups["tmpListContent"].Value.ToString().Trim();
strHtml = Regex.Replace(strHtml, _strDelRegex, "");
}
return strHtml;
}
//获得需要文章列表的数组
public List<ListPageItems> GetListArray(string replaceStr,string beReplaceStr)
{
Regex reg = new Regex(_strListRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
MatchCollection mCollect = reg.Matches(_strListPageHtml);
List<ListPageItems> list = new List<ListPageItems>();
if (mCollect.Count > 0)
{
for (int i = 0; i < mCollect.Count - 1; i++)
{
ListPageItems clsListItem = new ListPageItems();
clsListItem.Title = mCollect[i].Groups["Title"].Value.ToString().Trim();
clsListItem.DetailUrl = mCollect[i].Groups["DetailUrl"].Value.ToString().Trim();
string detailUrl = clsListItem.DetailUrl;
if (beReplaceStr != "")
{
detailUrl=detailUrl.Replace(beReplaceStr, replaceStr);
clsListItem.DetailUrl = detailUrl;
}
list.Add(clsListItem);
}
}
return list;
}
//获得下一页网页地址
public string GetNextPage()
{
string strHtml = "";
Regex reg = new Regex(_strNextPageRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
Match match = reg.Match(_strListPageHtml);
if (match.Success)
{
strHtml = match.Groups["NextPageUrl"].Value.ToString().Trim();
}
strHtml = StringHelper.GetChineseURLCode(strHtml);
return strHtml;
}
}
//列表类
public class ListPageItems
{
private string _strTitle;
private string _strDetailUrl;
public string Title
{
get { return _strTitle; }
set { _strTitle = value; }
}
public string DetailUrl
{
get { return _strDetailUrl; }
set { _strDetailUrl = value; }
}
}
}
这个是列表类
WebTool类
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
/**//// <summary>
/// 功能:页面请求
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
namespace WebPage
{
public static class WebPageTools
{
public static string GetCompeletUrl(string strFristPage, string NextPage)
{
string strNextPageUrl = NextPage;
string strDomain = "";
strFristPage = StringHelper.GetChineseURLCode(strFristPage);
Regex reg = new Regex(@"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?");
Match match = reg.Match(strNextPageUrl);
if (match.Success)
{
return strNextPageUrl;
}
else
{
//获取相对地址的路径,例如:/zh_cn/hairdressing/nxzr/zx/11052507/20060705/
Regex reg2 = new Regex(@"/(?<Files>.*/)");
Match match2 = reg2.Match(NextPage);
string strFiles="";
if (match2.Success)
{
strFiles = match2.Groups["Files"].Value.ToString().Trim();
}
if (strFiles.Length > 0)
{
Regex reg1 = new Regex(@"(?<domain>http://([\w-]+\.)+[\w-]+(/)?)");
Match match1 = reg1.Match(strFristPage);
if (match1.Success)
{
strDomain = match1.Groups["domain"].Value.ToString().Trim();
}
string PageNum = Regex.Replace(strNextPageUrl, @"/(?<Files>.*/)", "");
strNextPageUrl = strDomain + strFiles+PageNum;
}
else
{
Regex reg3 = new Regex(@"(?<domain>.*/)");
Match match3 = reg3.Match(strFristPage);
if (match3.Success)
{
strDomain = match3.Groups["domain"].Value.ToString().Trim();
}
strNextPageUrl = Regex.Replace(strNextPageUrl, @"(?<domain>.*/)", "");
strNextPageUrl = strDomain + strNextPageUrl;
}
//Regex reg1 = new Regex(@"(?<domain>.*/)");
}
return strNextPageUrl;
}
public static string GetConent(string _strUrl, int _intTimeOut,int flag)
{
string strHtml = "";
try
{
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(_strUrl);
myReq.Timeout = _intTimeOut;
myReq.Method = "GET";
//myReq.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse MyRes = (HttpWebResponse)myReq.GetResponse();
Stream stream = MyRes.GetResponseStream();
StreamReader streamreader = new StreamReader(stream, Encoding.GetEncoding("GB2312"));
strHtml = streamreader.ReadToEnd();
if (flag == 1)
{
strHtml = Regex.Replace(strHtml, @"[\""\r\f\n']", "");
}
MyRes.Close();
streamreader.Close();
}
catch
{
}
return strHtml;
}
public static string FilterPaticularChar(string strFilter)
{
strFilter = strFilter.Replace("<BR>", "<br>");
strFilter = strFilter.Replace("<BR />", "<br>");
strFilter = strFilter.Replace("<BR/>", "<br>");
strFilter = strFilter.Replace("<Br>", "<br>");
strFilter = strFilter.Replace("<br>", "[---]");
strFilter = strFilter.Replace("<P>", "<p>");
strFilter = strFilter.Replace("<p>", "[----]");
strFilter = strFilter.Replace("</P>", "</p>");
strFilter = strFilter.Replace("</p>", "[/----]");
strFilter = strFilter.Replace("<STRONG>", "<strong>");
strFilter = strFilter.Replace("<strong>", "[-----]");
strFilter = strFilter.Replace("</STRONG>", "</strong>");
strFilter = strFilter.Replace("</strong>", "[/-----]");
strFilter = Regex.Replace(strFilter, "<[^>]*>", "");
strFilter = strFilter.Replace("[---]", "<br>");
strFilter = strFilter.Replace("[----]", "<p>");
strFilter = strFilter.Replace("[/----]", "</p>");
strFilter = strFilter.Replace("[-----]", "<strong>");
strFilter = strFilter.Replace("[/-----]", "</strong>");
return strFilter;
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
/**//// <summary>
/// 功能:页面请求
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
namespace WebPage
{
public static class WebPageTools
{
public static string GetCompeletUrl(string strFristPage, string NextPage)
{
string strNextPageUrl = NextPage;
string strDomain = "";
strFristPage = StringHelper.GetChineseURLCode(strFristPage);
Regex reg = new Regex(@"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?");
Match match = reg.Match(strNextPageUrl);
if (match.Success)
{
return strNextPageUrl;
}
else
{
//获取相对地址的路径,例如:/zh_cn/hairdressing/nxzr/zx/11052507/20060705/
Regex reg2 = new Regex(@"/(?<Files>.*/)");
Match match2 = reg2.Match(NextPage);
string strFiles="";
if (match2.Success)
{
strFiles = match2.Groups["Files"].Value.ToString().Trim();
}
if (strFiles.Length > 0)
{
Regex reg1 = new Regex(@"(?<domain>http://([\w-]+\.)+[\w-]+(/)?)");
Match match1 = reg1.Match(strFristPage);
if (match1.Success)
{
strDomain = match1.Groups["domain"].Value.ToString().Trim();
}
string PageNum = Regex.Replace(strNextPageUrl, @"/(?<Files>.*/)", "");
strNextPageUrl = strDomain + strFiles+PageNum;
}
else
{
Regex reg3 = new Regex(@"(?<domain>.*/)");
Match match3 = reg3.Match(strFristPage);
if (match3.Success)
{
strDomain = match3.Groups["domain"].Value.ToString().Trim();
}
strNextPageUrl = Regex.Replace(strNextPageUrl, @"(?<domain>.*/)", "");
strNextPageUrl = strDomain + strNextPageUrl;
}
//Regex reg1 = new Regex(@"(?<domain>.*/)");
}
return strNextPageUrl;
}
public static string GetConent(string _strUrl, int _intTimeOut,int flag)
{
string strHtml = "";
try
{
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(_strUrl);
myReq.Timeout = _intTimeOut;
myReq.Method = "GET";
//myReq.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse MyRes = (HttpWebResponse)myReq.GetResponse();
Stream stream = MyRes.GetResponseStream();
StreamReader streamreader = new StreamReader(stream, Encoding.GetEncoding("GB2312"));
strHtml = streamreader.ReadToEnd();
if (flag == 1)
{
strHtml = Regex.Replace(strHtml, @"[\""\r\f\n']", "");
}
MyRes.Close();
streamreader.Close();
}
catch
{
}
return strHtml;
}
public static string FilterPaticularChar(string strFilter)
{
strFilter = strFilter.Replace("<BR>", "<br>");
strFilter = strFilter.Replace("<BR />", "<br>");
strFilter = strFilter.Replace("<BR/>", "<br>");
strFilter = strFilter.Replace("<Br>", "<br>");
strFilter = strFilter.Replace("<br>", "[---]");
strFilter = strFilter.Replace("<P>", "<p>");
strFilter = strFilter.Replace("<p>", "[----]");
strFilter = strFilter.Replace("</P>", "</p>");
strFilter = strFilter.Replace("</p>", "[/----]");
strFilter = strFilter.Replace("<STRONG>", "<strong>");
strFilter = strFilter.Replace("<strong>", "[-----]");
strFilter = strFilter.Replace("</STRONG>", "</strong>");
strFilter = strFilter.Replace("</strong>", "[/-----]");
strFilter = Regex.Replace(strFilter, "<[^>]*>", "");
strFilter = strFilter.Replace("[---]", "<br>");
strFilter = strFilter.Replace("[----]", "<p>");
strFilter = strFilter.Replace("[/----]", "</p>");
strFilter = strFilter.Replace("[-----]", "<strong>");
strFilter = strFilter.Replace("[/-----]", "</strong>");
return strFilter;
}
}
}
页面工具类
WebConfig类
using System;
using System.Collections.Generic;
using System.Text;
/**//// <summary>
/// 功能:ListPage类设置
/// 创建时间:07-3-5
/// 创建人:张杰
/// </summary>
namespace WebPage
{
public class WebPageConfig
{
}
//ListPage类设置
public class WebListPageConfig : WebPageConfig
{
private string _strUrl;
private string strRegex_CutHtml;
private string strRegex_ListHtml;
private string strRegex_DelHtml;
private string strRegex_NextPageHtml;
private int _intTimeOut;
private int _intfilterFlag;
public string ListUrl
{
get { return _strUrl; }
set { _strUrl = value; }
}
public string strCutRegex
{
get { return strRegex_CutHtml; }
set { strRegex_CutHtml = value; }
}
public string strListRegex
{
get { return strRegex_ListHtml; }
set { strRegex_ListHtml = value; }
}
public string strDelRegex
{
get { return strRegex_DelHtml; }
set { strRegex_DelHtml = value; }
}
public string strNextPageRegex
{
get { return strRegex_NextPageHtml; }
set { strRegex_NextPageHtml = value; }
}
public int intTimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
public int filterFlag
{
get { return _intfilterFlag; }
set { _intfilterFlag = value; }
}
public WebListPageConfig(string strListPage, string strCutRegex, string strListRegex, string strDelRegex, string strNextPageRegex, int intTimeOut, int filterFlag)
{
_strUrl = strListPage;
strRegex_CutHtml = strCutRegex;
strRegex_ListHtml = strListRegex;
strRegex_DelHtml = strDelRegex;
strRegex_NextPageHtml = strNextPageRegex;
_intTimeOut = intTimeOut;
_intfilterFlag = filterFlag;
}
}
//DetailPage类设置
public class WebDetailPageConfig : WebPageConfig
{
private string _strUrl;
private string strRegex_CutHtml;
private string strRegex_DelHtml;
private int _intTimeOut;
private int _intfilterFlag;
public string DetailUrl
{
get { return _strUrl; }
set { _strUrl = value; }
}
public string strCutRegex
{
get { return strRegex_CutHtml; }
set { strRegex_CutHtml = value; }
}
public string strDelRegex
{
get { return strRegex_DelHtml; }
set { strRegex_DelHtml = value; }
}
public int intTimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
public int filterFlag
{
get { return _intfilterFlag; }
set { _intfilterFlag = value; }
}
public WebDetailPageConfig(string strDetailUrl, string strCutRegex, string strDelRegex, int intTimeOut, int filterFlag)
{
_strUrl = strDetailUrl;
strRegex_CutHtml = strCutRegex;
strRegex_DelHtml = strDelRegex;
_intTimeOut = intTimeOut;
_intfilterFlag = filterFlag;
}
}
//数据库结构
public class DBSoures
{
private string _strUrlAddress;
private string _strKindName;
private int _intKindID;
private int _intTotolPage;
private string _strListRegex1;
private string _strListRegex2;
private string _strListRegex3;
private string _strListRegex4;
private int _intTimeOut;
private string _strDetailRegex1;
private string _strDetailRegex2;
private int _intFlag;
private string _ReplaceUrl;
private string _BeReplaceStr;
public string UrlAddress
{
get { return _strUrlAddress; }
set { _strUrlAddress = value; }
}
public string KindName
{
get { return _strKindName; }
set { _strKindName = value; }
}
public int KindID
{
get { return _intKindID; }
set { _intKindID = value; }
}
public int TotolPage
{
get { return _intTotolPage; }
set { _intTotolPage = value; }
}
public string ListRegex1
{
get { return _strListRegex1; }
set { _strListRegex1 = value; }
}
public string ListRegex2
{
get { return _strListRegex2; }
set { _strListRegex2 = value; }
}
public string ListRegex3
{
get { return _strListRegex3; }
set { _strListRegex3 = value; }
}
public string ListRegex4
{
get { return _strListRegex4; }
set { _strListRegex4 = value; }
}
public int TimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
public string DetailRegex1
{
get { return _strDetailRegex1; }
set { _strDetailRegex1 = value; }
}
public string DetailRegex2
{
get { return _strDetailRegex2; }
set { _strDetailRegex2 = value; }
}
public int filterFlag
{
get { return _intFlag; }
set { _intFlag = value; }
}
public string ReplaceUrl
{
get { return _ReplaceUrl; }
set { _ReplaceUrl = value; }
}
public string BeReplaceStr
{
get { return _BeReplaceStr; }
set { _BeReplaceStr = value; }
}
public DBSoures(string UrlAddress,string KindName,int KindID,int TotolPage,string ListRegex1,string ListRegex2,string ListRegex3,string ListRegex4,int TimeOut,string DetailRegex1,string DetailRegex2,int intFlag,string ReplaceUrl,string BeReplaceStr)
{
_strUrlAddress = UrlAddress;
_strKindName = KindName;
_intKindID = KindID;
_intTotolPage = TotolPage;
_strListRegex1 = ListRegex1;
_strListRegex2 = ListRegex2;
_strListRegex3 = ListRegex3;
_strListRegex4 = ListRegex4;
_intTimeOut = TimeOut;
_strDetailRegex1 = DetailRegex1;
_strDetailRegex2 = DetailRegex2;
_intFlag = intFlag;
_ReplaceUrl = ReplaceUrl;
_BeReplaceStr = BeReplaceStr;
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
/**//// <summary>
/// 功能:ListPage类设置
/// 创建时间:07-3-5
/// 创建人:张杰
/// </summary>
namespace WebPage
{
public class WebPageConfig
{
}
//ListPage类设置
public class WebListPageConfig : WebPageConfig
{
private string _strUrl;
private string strRegex_CutHtml;
private string strRegex_ListHtml;
private string strRegex_DelHtml;
private string strRegex_NextPageHtml;
private int _intTimeOut;
private int _intfilterFlag;
public string ListUrl
{
get { return _strUrl; }
set { _strUrl = value; }
}
public string strCutRegex
{
get { return strRegex_CutHtml; }
set { strRegex_CutHtml = value; }
}
public string strListRegex
{
get { return strRegex_ListHtml; }
set { strRegex_ListHtml = value; }
}
public string strDelRegex
{
get { return strRegex_DelHtml; }
set { strRegex_DelHtml = value; }
}
public string strNextPageRegex
{
get { return strRegex_NextPageHtml; }
set { strRegex_NextPageHtml = value; }
}
public int intTimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
public int filterFlag
{
get { return _intfilterFlag; }
set { _intfilterFlag = value; }
}
public WebListPageConfig(string strListPage, string strCutRegex, string strListRegex, string strDelRegex, string strNextPageRegex, int intTimeOut, int filterFlag)
{
_strUrl = strListPage;
strRegex_CutHtml = strCutRegex;
strRegex_ListHtml = strListRegex;
strRegex_DelHtml = strDelRegex;
strRegex_NextPageHtml = strNextPageRegex;
_intTimeOut = intTimeOut;
_intfilterFlag = filterFlag;
}
}
//DetailPage类设置
public class WebDetailPageConfig : WebPageConfig
{
private string _strUrl;
private string strRegex_CutHtml;
private string strRegex_DelHtml;
private int _intTimeOut;
private int _intfilterFlag;
public string DetailUrl
{
get { return _strUrl; }
set { _strUrl = value; }
}
public string strCutRegex
{
get { return strRegex_CutHtml; }
set { strRegex_CutHtml = value; }
}
public string strDelRegex
{
get { return strRegex_DelHtml; }
set { strRegex_DelHtml = value; }
}
public int intTimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
public int filterFlag
{
get { return _intfilterFlag; }
set { _intfilterFlag = value; }
}
public WebDetailPageConfig(string strDetailUrl, string strCutRegex, string strDelRegex, int intTimeOut, int filterFlag)
{
_strUrl = strDetailUrl;
strRegex_CutHtml = strCutRegex;
strRegex_DelHtml = strDelRegex;
_intTimeOut = intTimeOut;
_intfilterFlag = filterFlag;
}
}
//数据库结构
public class DBSoures
{
private string _strUrlAddress;
private string _strKindName;
private int _intKindID;
private int _intTotolPage;
private string _strListRegex1;
private string _strListRegex2;
private string _strListRegex3;
private string _strListRegex4;
private int _intTimeOut;
private string _strDetailRegex1;
private string _strDetailRegex2;
private int _intFlag;
private string _ReplaceUrl;
private string _BeReplaceStr;
public string UrlAddress
{
get { return _strUrlAddress; }
set { _strUrlAddress = value; }
}
public string KindName
{
get { return _strKindName; }
set { _strKindName = value; }
}
public int KindID
{
get { return _intKindID; }
set { _intKindID = value; }
}
public int TotolPage
{
get { return _intTotolPage; }
set { _intTotolPage = value; }
}
public string ListRegex1
{
get { return _strListRegex1; }
set { _strListRegex1 = value; }
}
public string ListRegex2
{
get { return _strListRegex2; }
set { _strListRegex2 = value; }
}
public string ListRegex3
{
get { return _strListRegex3; }
set { _strListRegex3 = value; }
}
public string ListRegex4
{
get { return _strListRegex4; }
set { _strListRegex4 = value; }
}
public int TimeOut
{
get { return _intTimeOut; }
set { _intTimeOut = value; }
}
public string DetailRegex1
{
get { return _strDetailRegex1; }
set { _strDetailRegex1 = value; }
}
public string DetailRegex2
{
get { return _strDetailRegex2; }
set { _strDetailRegex2 = value; }
}
public int filterFlag
{
get { return _intFlag; }
set { _intFlag = value; }
}
public string ReplaceUrl
{
get { return _ReplaceUrl; }
set { _ReplaceUrl = value; }
}
public string BeReplaceStr
{
get { return _BeReplaceStr; }
set { _BeReplaceStr = value; }
}
public DBSoures(string UrlAddress,string KindName,int KindID,int TotolPage,string ListRegex1,string ListRegex2,string ListRegex3,string ListRegex4,int TimeOut,string DetailRegex1,string DetailRegex2,int intFlag,string ReplaceUrl,string BeReplaceStr)
{
_strUrlAddress = UrlAddress;
_strKindName = KindName;
_intKindID = KindID;
_intTotolPage = TotolPage;
_strListRegex1 = ListRegex1;
_strListRegex2 = ListRegex2;
_strListRegex3 = ListRegex3;
_strListRegex4 = ListRegex4;
_intTimeOut = TimeOut;
_strDetailRegex1 = DetailRegex1;
_strDetailRegex2 = DetailRegex2;
_intFlag = intFlag;
_ReplaceUrl = ReplaceUrl;
_BeReplaceStr = BeReplaceStr;
}
}
}
这就是所有的业务层代码,数据层可以对sqlserver和mysq进行操作 download
长期招聘java,有找工作可以联系我,微信:caozhenhua1563