AcWing做题数爬取

请务必保证是在账号登录的状态下食用

Python

准备步骤

  • F12开启网站控制台
  • 网络界面下刷新, 找到名称为www.acwing.com的选项
  • 点击, 并在请求标头下寻找Cookie值, 复制到代码中去
  • 在代码中修改网页总数

代码

import re
import os

# 检查库文件,没有就自动安装
os.system('pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple requests==2.24.0')

import requests
from lxml import etree


class Spider(object):
    def __init__(self):
        self.base_url = 'https://www.acwing.com/problem/{}/'

        self.headers = {
            # 修改网站`Cookie`
            'Cookie': '',
            'Referer': 'https://www.acwing.com/about/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                          ' (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
        }
        self.count = 0

    def get_html(self, url):
        html = requests.get(
            url=url,
            headers=self.headers
        ).text

        return html

    def xpath_func(self, html):
        name_bds = '//tbody/tr[./td/span[@title="已通过这道题目"]]/td/a/text()'
        base_obj = etree.HTML(html)
        name_lists = base_obj.xpath(name_bds)
        L = []
        for i in name_lists:
            L.append(i.strip())
        return L

    def re_func(self, html, re_bds):
        pattern = re.compile(re_bds, re.S)
        re_list = pattern.findall(html)

        return re_list

    def parse_html(self, url):
        html = self.get_html(url)
        L = self.xpath_func(html)
        return L

    def run(self):
        warning = input('您马上就要爬取acwing了,看一下你的做题数,您的劳动成果将会在下面展示出来,确定要看吗?(Y/N)')
        if warning == 'Y':
            print('爬虫系统已经启动...正在努力抓取,请稍等....')
            print('+---------------------------------+')
            print('|            name                 |')
            print('+---------------------------------+')
            # 修改网站总数
            for i in range(1, 85):
                url = self.base_url.format(i)
                L = self.parse_html(url)
                for _ in L:
                    self.count += 1
                    print('|   ' + _)
            print('+---------------------------------+')
            print('经过您的不懈努力,您一共做了' + str(self.count) + '道题,继续努力!!')
        else:
            print('已经退出,你这个弱者')


if __name__ == '__main__':
    spider = Spider()
    spider.run()

posted @ 2021-12-28 17:26  哇唔?  阅读(357)  评论(0编辑  收藏  举报