快速排名系统核心代码

        async static Task TaskRun(string link)
        {
            string ip = string.Empty;
            string errmsg = "";
            while (string.IsNullOrEmpty(ip))
            {
                IpList = IpHelper.GetAvilableIpList(out errmsg);
                if (!string.IsNullOrEmpty(errmsg))
                {
                    Console.WriteLine(errmsg);
                }
                else
                {
                    int index = new Random().Next(0, IpList.Count());
                    ip = IpList[index];
                    Console.WriteLine("当前代理IP:" + ip);
                }
                Thread.Sleep(new Random().Next(1000, 3000));
            }

            //await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
            LaunchOptions options = new LaunchOptions
            {
                Headless = false,
                Args = new[] {
                    string.Format("--proxy-server={0}",ip),
                    "--start-maximized",//最大窗口
                    "--disable-infobars",//--隐藏自动化标题
                    "--no-sandbox",
                    "--disable-setuid-sandbox",
                    "--ignore-certificate-errors",
                    "--app=https://www.baidu.com/"
                },
                IgnoreHTTPSErrors = true
            };
            var extra = new PuppeteerExtra();
            extra.Use(new StealthPlugin());
            using (var browser = await extra.LaunchAsync(options))
            {
                using (var page = await browser.NewPageAsync())
                {
                    string userAgent = UAList[new Random().Next(0, UAList.Count())];
                    await page.SetUserAgentAsync(userAgent);
                    ViewPortOptions vOptions = new ViewPortOptions
                    {
                        Width = 1920,
                        Height = 1080
                    };
                    await page.SetViewportAsync(vOptions);
                    Dictionary<string, string> dicHeader = new Dictionary<string, string>();
                    dicHeader.Add("referer", "https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=1&tn=baidu&wd=c%23%20htmlagility&oq=%25E5%25BE%25AE%25E8%25B0%25B1%25E6%25A3%2580%25E6%25B5%258B%25E6%2590%259C%25E4%25BA%2586%25E7%25BD%2591&rsv_pq=eb9ff0ce00008fdb&rsv_t=5794Qmog%2FW4kfXpoYcJXzzRk4iN0Dx7vYa8xiv%2Fhej8i69AmoTGkqlME680&rqlang=cn&rsv_dl=ts_2&rsv_enter=1&rsv_sug3=10&rsv_sug1=3&rsv_sug7=100&rsv_sug2=1&rsv_btype=t&prefixsug=%2526lt%253B%2523%2520htmla&rsp=2&inputT=6556&rsv_sug4=8091");
                    await page.SetExtraHttpHeadersAsync(dicHeader);
                    try
                    {
                        //隐藏webdriver特征
                        //await page.EvaluateExpressionOnNewDocumentAsync("delete navigator.__proto__.webdriver;");
                        try
                        {
                            await page.GoToAsync(link, WaitUntilNavigation.Networkidle2);
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine("启动浏览器异常:" + ex.Message);
                            await browser.CloseAsync();//关闭浏览器
                            ip = string.Empty;
                            //重新运行任务
                            var d = Task.Run(() => TaskRun(link));
                            d.Wait();
                            return;
                        }

         
                        string eqid = await Geteqid(page);
                        if (string.IsNullOrEmpty(eqid))
                        {
                            await browser.CloseAsync();//关闭浏览器
                            ip = string.Empty;
                            ////重新运行任务
                            var d = Task.Run(() => TaskRun(link));
                            d.Wait();
                            return;
                        }
                        List<LinkModel> linkList = new List<LinkModel>();
                        List<Page> pages = new List<Page>();

                        string pagesource = await page.GetContentAsync();
                        linkList = GetAllHrefs(pagesource, eqid);//得到页面所有需要点击的链接
                        linkList = GetListRandomItems(linkList, linkList.Count()/2);
                        ElementHandle[] handlers = await page.XPathAsync("//a[@class='siteLink_9TPP3']");
                        //遍历访问搜索结果页面
                        foreach (var href in linkList)
                        {
                            using (var newPage = await browser.NewPageAsync())
                            {
                                await newPage.SetUserAgentAsync(userAgent);
                                await newPage.SetViewportAsync(vOptions);
                                try
                                {
                                    await newPage.GoToAsync(href.link, WaitUntilNavigation.DOMContentLoaded);
                                    await newPage.WaitForNavigationAsync(new NavigationOptions { Timeout = 15000 });
                                    await ScrollPage(newPage, 300, 700, 6, 500, 800);
                                }
                                catch (Exception ex)
                                {
                                    Console.WriteLine("当前打开页面链接异常:" + ex.Message);
                                }
                            }
                        }
                        if (linkList.Count() > 1)
                        {
                            Console.WriteLine("休息6秒钟...");
                            Thread.Sleep(6000);
                        }
                        else
                        {
                            Console.WriteLine("当前结果页面暂无目标链接...");
                        }
                        //关闭打开的子页面
                        foreach (var p in pages)
                        {
                            await p.CloseAsync();
                        }

                        await page.ClickAsync(".page-inner_2jZi2>a:last-child");//点击下一页
                        await page.ReloadAsync();
                        if(page.Url.Contains("pass.baidu.com"))
                        {
                            throw new Exception("安全验证");
                        }
                        var n = Task.Run(() => ChildTaskRun(browser, page, userAgent, vOptions));
                        n.Wait();
                        await browser.CloseAsync();//关闭浏览器
                        Console.WriteLine("全部任务已完成....");
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("global:" + ex.Message);
                        if (browser != null && !browser.IsClosed)
                        {
                            await browser.CloseAsync();
                            ip = string.Empty;
                            var d = Task.Run(() => TaskRun(link));//递归执行
                            d.Wait();
                        }
                    }
                }
            }
        }

 核心代码如上:

一、模拟点击与发包简析及区别分析

模拟点击:模拟正常鼠标移动、点击或是翻页等操作,代替手工,模拟点击最大缺点就是他的效率比较慢(相对于发包来讲)

发包:发包分为两种,一种为GET,一种为POST,发包需要两个步骤:(1)抓包:抓包就是把数据抓下来,进行分析 (2)发包:首先构造URL或是POST包里一些数据,然后直接提交。发包是没有界面的,也不用模拟人工,直接进行提交。因此,发包最大的一个优点就是可以线程化(多线程),效率远高于模拟。

二、模拟点击与发包难度

模拟点击:易语言用模块就可以实现,通过自写浏览器及填表,相对比较简单!

发包:(针对百度来讲)百度点击标题进网址包我抓取过,带http为GET,https是POST(本人能力有限,并不能完全确定,非专业抓包),难度比较大,单纯构造包上,要考虑各种随机参数(有一些参数是未解密,还有各种时间戳),因为本人之前写过刷展现工具,因此对这个比较了解,对程序以及解密百度参数能力要求比较高。

本文主要是以模拟点击来实现快排的。

代码地址:GitHub

 

posted @ 2022-05-05 14:29  极客船长  阅读(278)  评论(0编辑  收藏  举报