获取新浪读书频道,书的列表程序
using System;
using System.Net;
using System.IO;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
/// <summary>
/// Class1 的摘要说明。
/// </summary>
class Class1
{
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main(string[] args)
{
string url="http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml";
string content=Gethtml(url);
string[,] arr=new string[85,85];
arr[0,0]="";
for(int i=1;i<=84;i++)
{
url="<a href=/nzt/lit/dixiashi/"+i+".shtml target=_blank class=a03>";
string temp_1=GetChinese("[\u4e00-\u9fa5]",MID(content,url,80));//链接文字
string temp_2="Books_zg_"+i+".html";
arr[i,0]=temp_1;
arr[0,i]=temp_2;
}
content=Getlist(4,700,arr);
Writefile(@"C:\Documents and Settings\Administrator\桌面\index.html",content);
}
private static string MID(string Content,string StartString,int length)
{
string Intercept=Content;
int a=Intercept.IndexOf(StartString);
string aa=Intercept.Substring(a,length);
return aa;
}
/// <summary>
/// 获取网页html代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
private static string Gethtml(string url)
{
WebClient wc=new WebClient();
Stream str=wc.OpenRead(url);
StreamReader sr=new StreamReader(str,System.Text.Encoding.GetEncoding("GB2312"));
return sr.ReadToEnd();
}
// 获取指定网页的HTML代码
static string GetPageSource(string URL)
{
Uri uri =new Uri(URL);
HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();
hwReq.Method = "Get";
hwReq.KeepAlive = false;
StreamReader reader = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));
return reader.ReadToEnd();
}
/// <summary>
/// 获取列表
/// </summary>
/// <param name="col">列数</param>
/// <param name="ww">table的宽度</param>
/// <param name="arr">数组</param>
/// <returns></returns>
static string Getlist(int col,int ww,string[,] arr)
{
int temp_1=arr.GetLength(0);
int row=(int)System.Math.Ceiling(temp_1/Convert.ToDouble(col));//行数
int temp_2=0;
int temp_3=(int)System.Math.Floor(ww/Convert.ToDouble(col));//得到每列的宽度
StringBuilder sb=new StringBuilder();
sb.Append("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=gb2312\"><title>书的列表</title><style type=\"text/css\"><!--BODY {SCROLLBAR-FACE-COLOR: #c5c5c5; MARGIN: 0px; FONT: 12px 宋体; SCROLLBAR-HIGHLIGHT-COLOR: #c5c5c5; SCROLLBAR-SHADOW-COLOR: #c5c5c5; SCROLLBAR-3DLIGHT-COLOR: #c5c5c5; SCROLLBAR-ARROW-COLOR: #ffffff; SCROLLBAR-TRACK-COLOR: #fffffd; SCROLLBAR-DARKSHADOW-COLOR: #c5c5c5;font-size:13px;}A.a03:link {COLOR: #1E1E9C; TEXT-DECORATION: underline}A.a03:visited { COLOR: #6d6e71; TEXT-DECORATION: none}A.a03:active {COLOR: #ff0000; TEXT-DECORATION: none}A.a03:hover {COLOR: #ff0000; TEXT-DECORATION: none}td{font-size:13px;}--></style></head><body><table width=\""+ww+"\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\">");
for(int i=1;i<=row;i++)
{
sb.Append("<tr>");
for(int j=0;j<col;j++)
{
temp_2++;
sb.Append("<td width=\""+temp_3+"\">");
try
{
sb.Append("<a href=\""+arr[0,temp_2]+"\" target=\"_blank\" class=\"a03\">"+arr[temp_2,0]+"</a>");
}
catch
{
}
sb.Append("</td>");
}
sb.Append("</tr>");
sb.Append("<tr><td colspan=\""+col+"\" height=\"7\"></td></tr>");
}
sb.Append("</table></body></html>");
return sb.ToString();
}
/// <summary>
/// 写文件
/// </summary>
/// <param name="path"></param>
/// <param name="text"></param>
private static void Writefile(string path,string text)
{
using(StreamWriter sw=new StreamWriter(path,false,System.Text.Encoding.GetEncoding("gb2312"))) //中文,QuickCHM支持
{
sw.Write(text);
}
}
// 提取HTML代码中的网址
static ArrayList GetHyperLinks(string htmlCode)
{
ArrayList al = new ArrayList();
string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
Regex r = new Regex(strRegex,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(htmlCode);
for(int i=0; i<=m.Count-1; i++)
{
bool rep = false;
string strNew = m[i].ToString();
// 过滤重复的URL
foreach(string str in al)
{
if(strNew==str)
{
rep =true;
break;
}
}
if(!rep) al.Add(strNew);
}
al.Sort();
return al;
}
static string GetChinese(string reg,string str)
{
int temp=str.IndexOf("(");
string temp_1=str.Substring(temp+1,1);
Regex r = new Regex(reg,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(str);
string strNew="";
for(int i=0; i<=m.Count-1; i++)
{
strNew += m[i].ToString();
}
return strNew+"("+temp_1+")";
}
}
}
using System.Net;
using System.IO;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
/// <summary>
/// Class1 的摘要说明。
/// </summary>
class Class1
{
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main(string[] args)
{
string url="http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml";
string content=Gethtml(url);
string[,] arr=new string[85,85];
arr[0,0]="";
for(int i=1;i<=84;i++)
{
url="<a href=/nzt/lit/dixiashi/"+i+".shtml target=_blank class=a03>";
string temp_1=GetChinese("[\u4e00-\u9fa5]",MID(content,url,80));//链接文字
string temp_2="Books_zg_"+i+".html";
arr[i,0]=temp_1;
arr[0,i]=temp_2;
}
content=Getlist(4,700,arr);
Writefile(@"C:\Documents and Settings\Administrator\桌面\index.html",content);
}
private static string MID(string Content,string StartString,int length)
{
string Intercept=Content;
int a=Intercept.IndexOf(StartString);
string aa=Intercept.Substring(a,length);
return aa;
}
/// <summary>
/// 获取网页html代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
private static string Gethtml(string url)
{
WebClient wc=new WebClient();
Stream str=wc.OpenRead(url);
StreamReader sr=new StreamReader(str,System.Text.Encoding.GetEncoding("GB2312"));
return sr.ReadToEnd();
}
// 获取指定网页的HTML代码
static string GetPageSource(string URL)
{
Uri uri =new Uri(URL);
HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();
hwReq.Method = "Get";
hwReq.KeepAlive = false;
StreamReader reader = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));
return reader.ReadToEnd();
}
/// <summary>
/// 获取列表
/// </summary>
/// <param name="col">列数</param>
/// <param name="ww">table的宽度</param>
/// <param name="arr">数组</param>
/// <returns></returns>
static string Getlist(int col,int ww,string[,] arr)
{
int temp_1=arr.GetLength(0);
int row=(int)System.Math.Ceiling(temp_1/Convert.ToDouble(col));//行数
int temp_2=0;
int temp_3=(int)System.Math.Floor(ww/Convert.ToDouble(col));//得到每列的宽度
StringBuilder sb=new StringBuilder();
sb.Append("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=gb2312\"><title>书的列表</title><style type=\"text/css\"><!--BODY {SCROLLBAR-FACE-COLOR: #c5c5c5; MARGIN: 0px; FONT: 12px 宋体; SCROLLBAR-HIGHLIGHT-COLOR: #c5c5c5; SCROLLBAR-SHADOW-COLOR: #c5c5c5; SCROLLBAR-3DLIGHT-COLOR: #c5c5c5; SCROLLBAR-ARROW-COLOR: #ffffff; SCROLLBAR-TRACK-COLOR: #fffffd; SCROLLBAR-DARKSHADOW-COLOR: #c5c5c5;font-size:13px;}A.a03:link {COLOR: #1E1E9C; TEXT-DECORATION: underline}A.a03:visited { COLOR: #6d6e71; TEXT-DECORATION: none}A.a03:active {COLOR: #ff0000; TEXT-DECORATION: none}A.a03:hover {COLOR: #ff0000; TEXT-DECORATION: none}td{font-size:13px;}--></style></head><body><table width=\""+ww+"\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\">");
for(int i=1;i<=row;i++)
{
sb.Append("<tr>");
for(int j=0;j<col;j++)
{
temp_2++;
sb.Append("<td width=\""+temp_3+"\">");
try
{
sb.Append("<a href=\""+arr[0,temp_2]+"\" target=\"_blank\" class=\"a03\">"+arr[temp_2,0]+"</a>");
}
catch
{
}
sb.Append("</td>");
}
sb.Append("</tr>");
sb.Append("<tr><td colspan=\""+col+"\" height=\"7\"></td></tr>");
}
sb.Append("</table></body></html>");
return sb.ToString();
}
/// <summary>
/// 写文件
/// </summary>
/// <param name="path"></param>
/// <param name="text"></param>
private static void Writefile(string path,string text)
{
using(StreamWriter sw=new StreamWriter(path,false,System.Text.Encoding.GetEncoding("gb2312"))) //中文,QuickCHM支持
{
sw.Write(text);
}
}
// 提取HTML代码中的网址
static ArrayList GetHyperLinks(string htmlCode)
{
ArrayList al = new ArrayList();
string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
Regex r = new Regex(strRegex,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(htmlCode);
for(int i=0; i<=m.Count-1; i++)
{
bool rep = false;
string strNew = m[i].ToString();
// 过滤重复的URL
foreach(string str in al)
{
if(strNew==str)
{
rep =true;
break;
}
}
if(!rep) al.Add(strNew);
}
al.Sort();
return al;
}
static string GetChinese(string reg,string str)
{
int temp=str.IndexOf("(");
string temp_1=str.Substring(temp+1,1);
Regex r = new Regex(reg,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(str);
string strNew="";
for(int i=0; i<=m.Count-1; i++)
{
strNew += m[i].ToString();
}
return strNew+"("+temp_1+")";
}
}
}