C# 简单获取知轩藏书仙草榜

这个网站的url格式统一 书的详情页就是http://www.zxcs.me/post/ +{书的编号}。

书的评价链接是http://www.zxcs.me/content/plugins/cgz_xinqing/cgz_xinqing_action.php?action=show&id={书的编号}&m={0-1随机浮点数},返回的数据格式为逗号分隔的数字依次是仙草数,,,,毒草数。

步骤1、先请求详情页获取书的信息:编号,书名,作者,类别等;

步骤2、根据编号获取评价;

步骤3、存储,排序。

请求以及正则提取的类代码:

using System;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace ZXCSGET
{
    public class CrawlerTest
    {
        /// <summary>
        /// 匹配标题中的书名
        /// </summary>
        static string REG_1= @"(?<=\<title\>《).*?(?=》)";
        /// <summary>
        /// 匹配标题中的作者
        /// </summary>
        static string REG_2 = @"(?<=作者:).*?(?=[ ]-[ ]知轩藏书-藏尽网络中最好的精校小说\</title>)";
        static string REG_3 = @"(?<=精校电子书,).*?(?="")";
        /// <summary>
        /// 目标网站url
        /// </summary>
        static string url_1=@"http://www.zxcs.me/post/";
        static string pluginpath= @"http://www.zxcs.me/content/plugins/cgz_xinqing/";
        /// <summary>
        /// 请求URL1,获取书籍信息
        /// </summary>
        /// <param name="id">编号</param>
        /// <param name="reContent1">书名</param>
        /// <param name="reContent2">作者</param>
        /// <param name="reContent3">分类</param>
        /// <returns>执行是否成功</returns>
        public static bool RequestUrl1(int id,ref string reContent1,ref string reContent2,ref string reContent3) 
        {
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url_1 + id);
                request.Method = "get";
                request.ContentType = "text/html";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream respStream = response.GetResponseStream();
                StreamReader reader = new StreamReader(respStream, Encoding.UTF8);
                string responseText = reader.ReadToEnd();
                reader.Close();
                Regex regex1 = new Regex(REG_1);
                reContent1= regex1.Match(responseText).Groups[0].Value;
                Regex regex2 = new Regex(REG_2);
                reContent2= regex2.Match(responseText).Groups[0].Value;
                Regex regex3 = new Regex(REG_3);
                reContent3 = regex3.Match(responseText).Groups[0].Value;
                return true;
            }
            catch (Exception)
            {
                return false;
            }
        }

        /// <summary>
        /// 请求获取书的评价信息
        /// </summary>
        /// <param name="id">编号</param>
        /// <param name="grass">草数;0,0,0,0,0格式的字符串</param>
        /// <returns>执行是否成功</returns>
        public static bool RequestUrl2(int id,ref string grass) 
        {
            try
            {
                Random rd = new Random();
                string url = "" + pluginpath + "cgz_xinqing_action.php?action=show&id=" + id + "&m=" + rd.NextDouble();
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.Method = "get";
                request.ContentType = "text/html";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream respStream = response.GetResponseStream();
                StreamReader reader = new StreamReader(respStream, Encoding.UTF8);
                string responseText = reader.ReadToEnd();
                reader.Close();

                grass = responseText;
                return true;
            }
            catch (Exception)
            {
                return false;
            }
        }
    }
}

我是编号0-20000循环获取书信息,然后再获取评价,winform单线程很慢。存储也没写,直接排序输出了top20。后面有空再改进吧。

 

posted @ 2021-06-23 10:40  沛苍冥  阅读(2545)  评论(0编辑  收藏  举报