获取新浪读书频道,书的列表程序

using System;

using System.Net;

using System.IO;

using System.Text;

using System.Collections;

using System.Text.RegularExpressions;

namespace ConsoleApplication1

{

/// <summary>

/// Class1 的摘要说明。

/// </summary>

class Class1

{

/// <summary>

/// 应用程序的主入口点。

/// </summary>

[STAThread]

static void Main(string[] args)

{

string url="http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml";

string content=Gethtml(url);

string[,] arr=new string[85,85];

arr[0,0]="";

for(int i=1;i<=84;i++)

{

url="<a href=/nzt/lit/dixiashi/"+i+".shtml target=_blank class=a03>";

string temp_1=GetChinese("[\u4e00-\u9fa5]",MID(content,url,80));//链接文字

string temp_2="Books_zg_"+i+".html";

arr[i,0]=temp_1;

arr[0,i]=temp_2;

}

content=Getlist(4,700,arr);

Writefile(@"C:\Documents and Settings\Administrator\桌面\index.html",content);

}

private static string MID(string Content,string StartString,int length)

{

string Intercept=Content;

int a=Intercept.IndexOf(StartString);

string aa=Intercept.Substring(a,length);

return aa;

}

/// <summary>

/// 获取网页html代码

/// </summary>

/// <param name="url"></param>

/// <returns></returns>

private static string Gethtml(string url)

{

WebClient wc=new WebClient();

Stream str=wc.OpenRead(url);

StreamReader sr=new StreamReader(str,System.Text.Encoding.GetEncoding("GB2312"));

return sr.ReadToEnd();

}

// 获取指定网页的HTML代码

static string GetPageSource(string URL)

{

Uri uri =new Uri(URL);

HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);

HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();

hwReq.Method = "Get";

hwReq.KeepAlive = false;

StreamReader reader = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));

return reader.ReadToEnd();

}

/// <summary>

/// 获取列表

/// </summary>

/// <param name="col">列数</param>

/// <param name="ww">table的宽度</param>

/// <param name="arr">数组</param>

/// <returns></returns>

static string Getlist(int col,int ww,string[,] arr)

{

int temp_1=arr.GetLength(0);

int row=(int)System.Math.Ceiling(temp_1/Convert.ToDouble(col));//行数

int temp_2=0;

int temp_3=(int)System.Math.Floor(ww/Convert.ToDouble(col));//得到每列的宽度

StringBuilder sb=new StringBuilder();

sb.Append("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=gb2312\"><title>书的列表</title><style type=\"text/css\"></style></head><body><table width=\""+ww+"\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\">");

for(int i=1;i<=row;i++)

{

sb.Append("<tr>");

for(int j=0;j<col;j++)

{

temp_2++;

sb.Append("<td width=\""+temp_3+"\">");

try

{

sb.Append("<a href=\""+arr[0,temp_2]+"\" target=\"_blank\" class=\"a03\">"+arr[temp_2,0]+"</a>");

}

catch

{

}

sb.Append("</td>");

}

sb.Append("</tr>");

sb.Append("<tr><td colspan=\""+col+"\" height=\"7\"></td></tr>");

}

sb.Append("</table></body></html>");

return sb.ToString();

}

/// <summary>

/// 写文件

/// </summary>

/// <param name="path"></param>

/// <param name="text"></param>

private static void Writefile(string path,string text)

{

using(StreamWriter sw=new StreamWriter(path,false,System.Text.Encoding.GetEncoding("gb2312"))) //中文,QuickCHM支持

{

sw.Write(text);

}

// 提取HTML代码中的网址

static ArrayList GetHyperLinks(string htmlCode)

{

ArrayList al = new ArrayList();

string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";

Regex r = new Regex(strRegex,RegexOptions.IgnoreCase);

MatchCollection m = r.Matches(htmlCode);

for(int i=0; i<=m.Count-1; i++)

{

bool rep = false;

string strNew = m[i].ToString();

// 过滤重复的URL

foreach(string str in al)

{

if(strNew==str)

{

rep =true;

break;

}

if(!rep) al.Add(strNew);

}

al.Sort();

return al;

}

static string GetChinese(string reg,string str)

{

int temp=str.IndexOf("(");

string temp_1=str.Substring(temp+1,1);

Regex r = new Regex(reg,RegexOptions.IgnoreCase);

MatchCollection m = r.Matches(str);

string strNew="";

for(int i=0; i<=m.Count-1; i++)

{

strNew += m[i].ToString();

}

return strNew+"("+temp_1+")";

}

posted on 2006-12-25 10:55 感動常在阅读(547) 评论(1) 编辑收藏举报