topic

复制代码
using HtmlAgilityPack;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.IO;

namespace EasySpider
{
    public class ReadZhihu
    {
        public static void FormatDocument(string document)
        {
            HtmlDocument htmlDocument = new HtmlDocument();
            htmlDocument.LoadHtml(document);
            ////*[@id=\"FreeDefinePlaceholderControl1\"]
            var singleNode = htmlDocument.DocumentNode.SelectSingleNode(".//div[@id=\"zh-topic-organize-page-children\"]");
            var liNodes = singleNode.SelectNodes(".//a[@name=\"topic\"]");
            //foreach (var item in liNodes)
            //{
            //    string name = item.InnerText;
            //    string url = item.GetAttributeValue("href", string.Empty);
            //    WriteData(url, name);
            //}

            //get structure
            var level4 = singleNode.SelectNodes(".//ul/li/ul/li/ul/li/ul/li/a[@name=\"topic\"]");

            foreach (var item in level4)
            {
                string l4Url = item.GetAttributeValue("href", string.Empty);
                string l4Text = item.InnerText;

                var l3Node = item.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]");
                string l3Url = l3Node.GetAttributeValue("href", string.Empty);
                string l3Text = l3Node.InnerText;

                var l2Node = item.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]");
                string l2Text = l2Node.InnerText;
                string l2Url = l2Node.GetAttributeValue("href", string.Empty);

                var l1Node = item.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.ParentNode.SelectSingleNode(".//a[1]");
                string l1Text = l1Node.InnerText;
                string l1Url = l1Node.GetAttributeValue("href", string.Empty);
                JArray structure = new JArray();
                structure.Add(GenereateObject(l1Url, l1Text));
                structure.Add(GenereateObject(l2Url, l2Text));
                structure.Add(GenereateObject(l3Url, l3Text));
                structure.Add(GenereateObject(l4Url, l4Text));
                WriteData(structure, @"D:\学科Struct.json");
            }


        }
        public static JObject GenereateObject(string url, string topic)
        {
            JObject obj = new JObject();
            obj.Add("topic", topic);
            obj.Add("url", url);
            return obj;
        }

        public static void WriteData(object obj, string fileName)
        {
           

            //JObject QNA = (JObject)question;
            //string qus = QNA["Question"].ToString();
            //string ans = QNA["QuesDetail"].ToString();
            //string anstemp = string.Empty;
            //foreach (var item in ans.Split(new char[] { '\r', '\n' }))
            //{
            //    if (string.IsNullOrEmpty(item) || item.Contains("本页面内容供您参考"))
            //        continue;
            //    anstemp += item.Trim() + " ";
            //}

            //JObject obj = new JObject();
            //obj.Add("Question", qus);
            //obj.Add("Answer", anstemp.Trim());
            //ICBCQNA QNA = (ICBCQNA)question;

            string json = JsonConvert.SerializeObject(obj);

            string QnaPath = fileName;//文件存放路径,保证文件存在

            if (!File.Exists(QnaPath))
            {
                File.Create(QnaPath);

            }

            using (StreamWriter sw = new StreamWriter(QnaPath, true))
            {
                sw.WriteLine(json);
            }





        }
    }
}
复制代码

自动 

复制代码
var count = 0;
function clickitem() {
    var items = document.getElementsByName("load");
    var i;
    var itemSel = 0;
    for (i = 0; i < items.length; i++) {
        if (itemSel === 0) {
            itemSel = items[i];
            continue;
        }
        if (itemSel.offsetLeft > items[i].offsetLeft) {
            itemSel = items[i];
            continue;
        } else if (itemSel.offsetLeft == items[i].offsetLeft){
            if (itemSel.text == "显示子话题" && items[i].text == "加载更多") {
                itemSel = items[i];
            }
        }
    }
    itemSel.click();
}
setInterval(clickitem, 1);
复制代码

 

posted @   skywss27  阅读(474)  评论(0编辑  收藏  举报
编辑推荐:
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
阅读排行:
· 在鹅厂做java开发是什么体验
· 百万级群聊的设计实践
· WPF到Web的无缝过渡:英雄联盟客户端的OpenSilver迁移实战
· 永远不要相信用户的输入:从 SQL 注入攻防看输入验证的重要性
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
点击右上角即可分享
微信分享提示