获取新浪读书频道,书的列表程序

using System;
using System.Net;
using System.IO;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;

namespace ConsoleApplication1
{
    
/// <summary>
    
/// Class1 的摘要说明。
    
/// </summary>

    class Class1
    
{
        
/// <summary>
        
/// 应用程序的主入口点。
        
/// </summary>

        [STAThread]
        
static void Main(string[] args)
        
{
            
string url="http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml";
            
string content=Gethtml(url);

            
string[,] arr=new string[85,85];
            arr[
0,0]="";
            
for(int i=1;i<=84;i++)
            
{
                url
="<a href=/nzt/lit/dixiashi/"+i+".shtml target=_blank class=a03>";
                
string temp_1=GetChinese("[\u4e00-\u9fa5]",MID(content,url,80));//链接文字
                string temp_2="Books_zg_"+i+".html";
                arr[i,
0]=temp_1;
                arr[
0,i]=temp_2;
            }


            content
=Getlist(4,700,arr);
            Writefile(
@"C:\Documents and Settings\Administrator\桌面\index.html",content);
        }


        
private static string MID(string Content,string StartString,int length)
        
{
            
string Intercept=Content;
            
int a=Intercept.IndexOf(StartString);
            
string aa=Intercept.Substring(a,length);
            
return aa;
        }


        
/// <summary>
        
/// 获取网页html代码
        
/// </summary>
        
/// <param name="url"></param>
        
/// <returns></returns>

        private static string Gethtml(string url)
        
{
            WebClient wc
=new WebClient();
            Stream str
=wc.OpenRead(url);
            StreamReader sr
=new StreamReader(str,System.Text.Encoding.GetEncoding("GB2312"));
            
return sr.ReadToEnd();
        }


        
// 获取指定网页的HTML代码
        static string GetPageSource(string URL)
        
{
            Uri uri 
=new Uri(URL);

            HttpWebRequest hwReq 
= (HttpWebRequest)WebRequest.Create(uri);
            HttpWebResponse hwRes 
= (HttpWebResponse)hwReq.GetResponse();

            hwReq.Method 
= "Get";

            hwReq.KeepAlive 
= false;

            StreamReader reader 
= new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));

            
return reader.ReadToEnd();
        }



        
/// <summary>
        
/// 获取列表
        
/// </summary>
        
/// <param name="col">列数</param>
        
/// <param name="ww">table的宽度</param>
        
/// <param name="arr">数组</param>
        
/// <returns></returns>

        static string  Getlist(int col,int ww,string[,] arr)
        
{
            
int temp_1=arr.GetLength(0);
            
int row=(int)System.Math.Ceiling(temp_1/Convert.ToDouble(col));//行数

            
int temp_2=0;

            
int temp_3=(int)System.Math.Floor(ww/Convert.ToDouble(col));//得到每列的宽度


            StringBuilder sb
=new StringBuilder();
            sb.Append(
"<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=gb2312\"><title>书的列表</title><style type=\"text/css\"><!--BODY {SCROLLBAR-FACE-COLOR: #c5c5c5; MARGIN: 0px; FONT: 12px 宋体; SCROLLBAR-HIGHLIGHT-COLOR: #c5c5c5; SCROLLBAR-SHADOW-COLOR: #c5c5c5; SCROLLBAR-3DLIGHT-COLOR: #c5c5c5; SCROLLBAR-ARROW-COLOR: #ffffff; SCROLLBAR-TRACK-COLOR: #fffffd; SCROLLBAR-DARKSHADOW-COLOR: #c5c5c5;font-size:13px;}A.a03:link {COLOR: #1E1E9C; TEXT-DECORATION: underline}A.a03:visited {    COLOR: #6d6e71; TEXT-DECORATION: none}A.a03:active {COLOR: #ff0000; TEXT-DECORATION: none}A.a03:hover {COLOR: #ff0000; TEXT-DECORATION: none}td{font-size:13px;}--></style></head><body><table width=\""+ww+"\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\">");
            
for(int i=1;i<=row;i++)
            
{
                sb.Append(
"<tr>");
                
for(int j=0;j<col;j++)
                
{
                    temp_2
++;
                    sb.Append(
"<td width=\""+temp_3+"\">");
                    
try
                    
{
                        sb.Append(
"<a href=\""+arr[0,temp_2]+"\" target=\"_blank\" class=\"a03\">"+arr[temp_2,0]+"</a>");
                    }

                    
catch
                    
{
                    }

                    sb.Append(
"</td>");
                }

                sb.Append(
"</tr>");
                sb.Append(
"<tr><td colspan=\""+col+"\" height=\"7\"></td></tr>");
            }

            sb.Append(
"</table></body></html>");

            
return sb.ToString();
        }



        
/// <summary>
        
/// 写文件
        
/// </summary>
        
/// <param name="path"></param>
        
/// <param name="text"></param>

        private static void Writefile(string path,string text)
        
{
            
using(StreamWriter sw=new StreamWriter(path,false,System.Text.Encoding.GetEncoding("gb2312"))) //中文,QuickCHM支持
            {
                sw.Write(text);
            }

        }


        
// 提取HTML代码中的网址
        static ArrayList GetHyperLinks(string htmlCode)
        
{
            ArrayList al 
= new ArrayList();

            
string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";

            Regex r 
= new Regex(strRegex,RegexOptions.IgnoreCase);
            MatchCollection m 
= r.Matches(htmlCode);

            
for(int i=0; i<=m.Count-1; i++)
            
{
                
bool rep = false;
                
string strNew = m[i].ToString();

                
// 过滤重复的URL
                foreach(string str in al)
                
{
                    
if(strNew==str)
                    
{
                        rep 
=true;
                        
break;
                    }
   
                }


                
if(!rep) al.Add(strNew);
            }


            al.Sort();

            
return al;
        }



        
static string GetChinese(string reg,string str)
        
{
            
int temp=str.IndexOf("(");
            
string temp_1=str.Substring(temp+1,1);
            Regex r 
= new Regex(reg,RegexOptions.IgnoreCase);
            MatchCollection m 
= r.Matches(str);
            
            
string strNew="";
            
for(int i=0; i<=m.Count-1; i++)
            
{
                strNew 
+= m[i].ToString();
            }


            
return strNew+"("+temp_1+")";
        }


    }

}

posted on 2006-12-25 10:55  感動常在  阅读(546)  评论(1编辑  收藏  举报