using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace allen
{
class Program
{
/// <summary>
/// 根据网址取得HTML代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
static string GetHtml(string url)
{
HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
Stream stream = response.GetResponseStream();
StreamReader reader = new StreamReader(stream, Encoding.Default);
string html = reader.ReadToEnd();
stream.Close();
return html;
}
static Regex reg;
/// <summary>
/// 过滤器,留下文章正文
/// </summary>
/// <param name="htmlStr"></param>
/// <returns></returns>
static string MyFilter(string htmlStr)
{
reg = new Regex(@"\s+");//先把任意空白符做掉
htmlStr = reg.Replace(htmlStr, "");
reg = new Regex("点此下载封神演义.txt</font></font></a></div></td>.*</div></td></tr><tr><tdclass=");//匹配出正文
Match match = reg.Match(htmlStr);
string result = match.Value;
result = result.Replace("点此下载封神演义.txt</font></font></a></div></td>", "");
result = result.Replace("</div></td></tr><tr><tdclass=","");
result = result.Replace("</tr></table>", "");
result = result.Replace("本文章下载于www.Txt66.com", "");
result = result.Replace("<br>",Environment.NewLine);
return result;
}
/// <summary>
/// 循环读取每页的文章,写入记事本
/// </summary>
static void WriteFile()
{
int page_num = 1;
string url = "http://www.txt66.com/read2.asp?id=8480&PageNum={0}";
string url_temp = string.Empty;
string html = string.Empty;
string text = string.Empty;
StreamWriter sw = new StreamWriter(@"F:\g.txt", true, Encoding.Unicode);
while (page_num < 124)
{
url_temp = string.Format(url, page_num);
html = GetHtml(url_temp);
text = MyFilter(html);
sw.Write(text);
Console.WriteLine("写入第{0}页", page_num);
System.Threading.Thread.Sleep(600);
page_num++;
}
sw.Close();
}
/// <summary>
/// 主函数
/// </summary>
/// <param name="args"></param>
static void Main(string[] args)
{
WriteFile();
Console.ReadKey();
}
}
}