批量下载小说网站文章
1 using HtmlAgilityPack; 2 using System; 3 using System.Collections.Generic; 4 using System.IO; 5 using System.Net; 6 using System.Text; 7 8 namespace DownloadNovel 9 { 10 class Program 11 { 12 static void Main(string[] args) 13 { 14 //小说网站的网址 15 string webSiteUrl = "http://www.biqugew.com"; 16 //指定小说目录的网址 17 string NovelUrl = "http://www.biqugew.com/book/10/"; 18 19 20 21 DownNovel(webSiteUrl, NovelUrl); 22 } 23 private static void DownNovel(string webSiteUrl, string NovelUrl) 24 { 25 string[] split = { "<br>", "\r\n" }; 26 //指定小说的目录的 Xpath 27 string TableXpath = "/body[1]/div[1]/div[5]/div[1]/dl[1]/dd"; 28 //获取小说标题的 XPath 29 string TitleXpath = "/html[1]/body[1]/div[1]/div[3]/div[1]/div[2]/h1[1]"; 30 //获取指定小说的内容的 Xpath 31 string ContentsXpath = "/html/body/div/div[3]/div/div[3]"; 32 WebClient client = new WebClient { Encoding = Encoding.GetEncoding("GB2312") }; 33 HtmlNodeCollection nodes = null; 34 { 35 HtmlDocument doc = new HtmlDocument(); 36 //获取目录页 37 doc.LoadHtml(client.DownloadString(NovelUrl)); 38 nodes = doc.DocumentNode.SelectNodes(TableXpath); 39 } 40 //解析目录页 41 foreach (HtmlNode node in nodes) 42 { 43 HtmlDocument doc = new HtmlDocument(); 44 //获取小说单章的网站 45 string url = webSiteUrl + node.SelectSingleNode("a").Attributes["href"].Value; 46 //获取小说单章整个网页 47 doc.LoadHtml(client.DownloadString(url)); 48 //获取本章小说的标题 49 string title = doc.DocumentNode.SelectSingleNode(TitleXpath).InnerHtml; 50 //获取小说文本内容 doc.DocumentNode.SelectSingleNode("/html/body/div/div[3]/div/div[3]").OuterHtml 51 string str = doc.DocumentNode.SelectSingleNode(ContentsXpath).InnerHtml.Replace(" ", ""); 52 //过滤文本中的特殊字符和字符串 53 string aticale = ""; 54 foreach (var txt in str.Split(split, StringSplitOptions.RemoveEmptyEntries)) 55 { 56 if (!txt.Contains("<a")) 57 aticale += txt; 58 } 59 Console.WriteLine(title); 60 WriteLog(title + Environment.NewLine + aticale); 61 } 62 63 } 64 65 static void WriteLog(string msg) 66 { 67 string path = Environment.CurrentDirectory + "/novel/"; 68 if (!Directory.Exists(path)) Directory.CreateDirectory(path); 69 string fileName = DateTime.Now.ToString("yyyy-MM-dd"); 70 string filepath = path + fileName + ".txt"; 71 Stream fileStream = File.Open(filepath, FileMode.Append, FileAccess.Write, FileShare.Write); 72 StreamWriter writeAdapter = new StreamWriter(fileStream, Encoding.Default); 73 writeAdapter.WriteLine(msg); 74 writeAdapter.WriteLine(); 75 writeAdapter.Close(); 76 } 77 78 } 79 }
一个自由.NET开发者
bingqiang1903@gmail.com
https://www.cnblogs.com/sunbingqiang/