topic
using HtmlAgilityPack; using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System.IO; namespace EasySpider { public class ReadZhihu { public static void FormatDocument(string document) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(document); ////*[@id=\"FreeDefinePlaceholderControl1\"] var singleNode = htmlDocument.DocumentNode.SelectSingleNode(".//div[@id=\"zh-topic-organize-page-children\"]"); var liNodes = singleNode.SelectNodes(".//a[@name=\"topic\"]"); //foreach (var item in liNodes) //{ // string name = item.InnerText; // string url = item.GetAttributeValue("href", string.Empty); // WriteData(url, name); //} //get structure var level4 = singleNode.SelectNodes(".//ul/li/ul/li/ul/li/ul/li/a[@name=\"topic\"]"); foreach (var item in level4) { string l4Url = item.GetAttributeValue("href", string.Empty); string l4Text = item.InnerText; var l3Node = item.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]"); string l3Url = l3Node.GetAttributeValue("href", string.Empty); string l3Text = l3Node.InnerText; var l2Node = item.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]"); string l2Text = l2Node.InnerText; string l2Url = l2Node.GetAttributeValue("href", string.Empty); var l1Node = item.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]"); string l1Text = l1Node.InnerText; string l1Url = l1Node.GetAttributeValue("href", string.Empty); JArray structure = new JArray(); structure.Add(GenereateObject(l1Url, l1Text)); structure.Add(GenereateObject(l2Url, l2Text)); structure.Add(GenereateObject(l3Url, l3Text)); structure.Add(GenereateObject(l4Url, l4Text)); WriteData(structure, @"D:\学科Struct.json"); } } public static JObject GenereateObject(string url, string topic) { JObject obj = new JObject(); obj.Add("topic", topic); obj.Add("url", url); return obj; } public static void WriteData(object obj, string fileName) { //JObject QNA = (JObject)question; //string qus = QNA["Question"].ToString(); //string ans = QNA["QuesDetail"].ToString(); //string anstemp = string.Empty; //foreach (var item in ans.Split(new char[] { '\r', '\n' })) //{ // if (string.IsNullOrEmpty(item) || item.Contains("本页面内容供您参考")) // continue; // anstemp += item.Trim() + " "; //} //JObject obj = new JObject(); //obj.Add("Question", qus); //obj.Add("Answer", anstemp.Trim()); //ICBCQNA QNA = (ICBCQNA)question; string json = JsonConvert.SerializeObject(obj); string QnaPath = fileName;//文件存放路径,保证文件存在 if (!File.Exists(QnaPath)) { File.Create(QnaPath); } using (StreamWriter sw = new StreamWriter(QnaPath, true)) { sw.WriteLine(json); } } } }
自动
var count = 0; function clickitem() { var items = document.getElementsByName("load"); var i; var itemSel = 0; for (i = 0; i < items.length; i++) { if (itemSel === 0) { itemSel = items[i]; continue; } if (itemSel.offsetLeft > items[i].offsetLeft) { itemSel = items[i]; continue; } else if (itemSel.offsetLeft == items[i].offsetLeft){ if (itemSel.text == "显示子话题" && items[i].text == "加载更多") { itemSel = items[i]; } } } itemSel.click(); } setInterval(clickitem, 1);
I'm fine, it's ok
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· 在鹅厂做java开发是什么体验
· 百万级群聊的设计实践
· WPF到Web的无缝过渡:英雄联盟客户端的OpenSilver迁移实战
· 永远不要相信用户的输入:从 SQL 注入攻防看输入验证的重要性
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析