topic
using HtmlAgilityPack; using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System.IO; namespace EasySpider { public class ReadZhihu { public static void FormatDocument(string document) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(document); ////*[@id=\"FreeDefinePlaceholderControl1\"] var singleNode = htmlDocument.DocumentNode.SelectSingleNode(".//div[@id=\"zh-topic-organize-page-children\"]"); var liNodes = singleNode.SelectNodes(".//a[@name=\"topic\"]"); //foreach (var item in liNodes) //{ // string name = item.InnerText; // string url = item.GetAttributeValue("href", string.Empty); // WriteData(url, name); //} //get structure var level4 = singleNode.SelectNodes(".//ul/li/ul/li/ul/li/ul/li/a[@name=\"topic\"]"); foreach (var item in level4) { string l4Url = item.GetAttributeValue("href", string.Empty); string l4Text = item.InnerText; var l3Node = item.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]"); string l3Url = l3Node.GetAttributeValue("href", string.Empty); string l3Text = l3Node.InnerText; var l2Node = item.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]"); string l2Text = l2Node.InnerText; string l2Url = l2Node.GetAttributeValue("href", string.Empty); var l1Node = item.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]"); string l1Text = l1Node.InnerText; string l1Url = l1Node.GetAttributeValue("href", string.Empty); JArray structure = new JArray(); structure.Add(GenereateObject(l1Url, l1Text)); structure.Add(GenereateObject(l2Url, l2Text)); structure.Add(GenereateObject(l3Url, l3Text)); structure.Add(GenereateObject(l4Url, l4Text)); WriteData(structure, @"D:\学科Struct.json"); } } public static JObject GenereateObject(string url, string topic) { JObject obj = new JObject(); obj.Add("topic", topic); obj.Add("url", url); return obj; } public static void WriteData(object obj, string fileName) { //JObject QNA = (JObject)question; //string qus = QNA["Question"].ToString(); //string ans = QNA["QuesDetail"].ToString(); //string anstemp = string.Empty; //foreach (var item in ans.Split(new char[] { '\r', '\n' })) //{ // if (string.IsNullOrEmpty(item) || item.Contains("本页面内容供您参考")) // continue; // anstemp += item.Trim() + " "; //} //JObject obj = new JObject(); //obj.Add("Question", qus); //obj.Add("Answer", anstemp.Trim()); //ICBCQNA QNA = (ICBCQNA)question; string json = JsonConvert.SerializeObject(obj); string QnaPath = fileName;//文件存放路径,保证文件存在 if (!File.Exists(QnaPath)) { File.Create(QnaPath); } using (StreamWriter sw = new StreamWriter(QnaPath, true)) { sw.WriteLine(json); } } } }
自动
var count = 0; function clickitem() { var items = document.getElementsByName("load"); var i; var itemSel = 0; for (i = 0; i < items.length; i++) { if (itemSel === 0) { itemSel = items[i]; continue; } if (itemSel.offsetLeft > items[i].offsetLeft) { itemSel = items[i]; continue; } else if (itemSel.offsetLeft == items[i].offsetLeft){ if (itemSel.text == "显示子话题" && items[i].text == "加载更多") { itemSel = items[i]; } } } itemSel.click(); } setInterval(clickitem, 1);
I'm fine, it's ok

浙公网安备 33010602011771号