简单的信息采集程序示例(小偷程序)
最近正准备做一个信息采集的程序,下面是一个简单的采集程序,提供给初学者入门参考。
aspx页面代码
.cs页面代码
aspx页面代码
<asp:TextBox ID="Txt_Url" runat="server" Width="441px"></asp:TextBox><br />
<asp:Button id="Btn_GetUrlSource" runat="server" Text="取得网页代码" OnClick="Btn_GetUrlSource_Click"></asp:Button>
<br />
<asp:TextBox id="Txt_UrlSource" runat="server" Width="100%" Height="195px" TextMode="MultiLine"></asp:TextBox><br />
<br />
采集开始代码
<asp:TextBox ID="Txt_First" runat="server" Height="90px" TextMode="MultiLine" Width="280px"></asp:TextBox><br />
<asp:Button ID="Btn_ListCheck" runat="server" OnClick="Btn_ListCheck_Click" Text="测试唯一性" /><br />
采集结束代码
<asp:TextBox ID="Txt_Last" runat="server" Height="90px" TextMode="MultiLine"
Width="280px"></asp:TextBox><br />
<br />
<asp:Button ID="Btn_Result" runat="server" Text="取得采集结果" OnClick="Btn_Result_Click" /><br />
<asp:TextBox ID="Txt_Result" runat="server" Height="134px" TextMode="MultiLine" Width="579px"></asp:TextBox>
<asp:Button id="Btn_GetUrlSource" runat="server" Text="取得网页代码" OnClick="Btn_GetUrlSource_Click"></asp:Button>
<br />
<asp:TextBox id="Txt_UrlSource" runat="server" Width="100%" Height="195px" TextMode="MultiLine"></asp:TextBox><br />
<br />
采集开始代码
<asp:TextBox ID="Txt_First" runat="server" Height="90px" TextMode="MultiLine" Width="280px"></asp:TextBox><br />
<asp:Button ID="Btn_ListCheck" runat="server" OnClick="Btn_ListCheck_Click" Text="测试唯一性" /><br />
采集结束代码
<asp:TextBox ID="Txt_Last" runat="server" Height="90px" TextMode="MultiLine"
Width="280px"></asp:TextBox><br />
<br />
<asp:Button ID="Btn_Result" runat="server" Text="取得采集结果" OnClick="Btn_Result_Click" /><br />
<asp:TextBox ID="Txt_Result" runat="server" Height="134px" TextMode="MultiLine" Width="579px"></asp:TextBox>
.cs页面代码
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using NetShuai.Database;
private string PageUrl = "";
private void Page_Load(object sender, System.EventArgs e)
{
// 在此处放置用户代码以初始化页面
}
protected void Btn_GetUrlSource_Click(object sender, EventArgs e)
{
PageUrl = Txt_Url.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
Txt_UrlSource.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
protected void Btn_Result_Click(object sender, EventArgs e)
{
string strExp;
strExp = @"(?<=" + Server.HtmlEncode(Txt_First.Text) + ")[\w\W]*?(?=" + Server.HtmlEncode(Txt_Last.Text) + ")";
MatchCollection mc = Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
for (int i = 0; i < mc.Count; i++)
{
Txt_Result.Text +=Server.HtmlDecode(mc[i].Value);
}
}
protected void Btn_ListCheck_Click(object sender, EventArgs e)
{
string strExp;
strExp = Server.HtmlEncode(Txt_First.Text);
MatchCollection mc = Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
if(mc.Count>1)
{
Response.Write("<script>alert('列表开始代码有重复!')</script>");
return;
}
strExp = Server.HtmlEncode(Txt_Last.Text);
mc = Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
if (mc.Count > 1)
{
Response.Write("<script>alert('列表结束代码有重复!')</script>");
return;
}
}
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using NetShuai.Database;
private string PageUrl = "";
private void Page_Load(object sender, System.EventArgs e)
{
// 在此处放置用户代码以初始化页面
}
protected void Btn_GetUrlSource_Click(object sender, EventArgs e)
{
PageUrl = Txt_Url.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
Txt_UrlSource.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
protected void Btn_Result_Click(object sender, EventArgs e)
{
string strExp;
strExp = @"(?<=" + Server.HtmlEncode(Txt_First.Text) + ")[\w\W]*?(?=" + Server.HtmlEncode(Txt_Last.Text) + ")";
MatchCollection mc = Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
for (int i = 0; i < mc.Count; i++)
{
Txt_Result.Text +=Server.HtmlDecode(mc[i].Value);
}
}
protected void Btn_ListCheck_Click(object sender, EventArgs e)
{
string strExp;
strExp = Server.HtmlEncode(Txt_First.Text);
MatchCollection mc = Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
if(mc.Count>1)
{
Response.Write("<script>alert('列表开始代码有重复!')</script>");
return;
}
strExp = Server.HtmlEncode(Txt_Last.Text);
mc = Regex.Matches(Server.HtmlEncode(Txt_UrlSource.Text), strExp);
if (mc.Count > 1)
{
Response.Write("<script>alert('列表结束代码有重复!')</script>");
return;
}
}