Python刷CSDN阅读数(仅供娱乐)

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@File:csdn_reads.py    
@E-mail:364942727@qq.com
@Time:2020/4/16 10:03 下午 
@Author:Nobita   
@Version:1.0   
@Desciption:None
"""

import re
import time
import random
import requests
import urllib.request
from bs4 import BeautifulSoup

user_agent_list = [
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0",
    "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
    "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15",
]
firefoxHead = {"User-Agent": user_agent_list}
firefoxHead['User-Agent'] = random.choice(user_agent_list)
IPRegular = r"(([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5]).){3}([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])"
host = "https://blog.csdn.net"
url = "https://blog.csdn.net/hz90s/article/details/{}"
code = ["105518260"]


def parseIPList(url="http://www.xicidaili.com/"):
    IPs = []
    request = urllib.request.Request(url, headers=firefoxHead)
    response = urllib.request.urlopen(request)
    soup = BeautifulSoup(response, "html.parser")
    tds = soup.find_all("td")
    for td in tds:
        string = str(td.string)
        if re.search(IPRegular, string):
            IPs.append(string)
    return IPs


def PV(code):
    s = requests.Session()
    s.headers = firefoxHead['User-Agent']
    count = 0
    while True:
        count += 1
        print("asking for {} times\t".format(count), end="\t")
        IPs = parseIPList()
        s.proxies = {"http": "{}:8080".format(IPs[random.randint(0, 40)])}
        s.get(host)
        r = s.get(url.format(code))
        html = r.text
        soup = BeautifulSoup(html, "html.parser")
        spans = soup.find_all("span")
        print(spans[2].string)
        time.sleep(random.randint(60, 75))


def main():
    PV(code[0])


if __name__ == "__main__":
    main()

 

posted @ 2020-04-17 08:26  charseki  阅读(313)  评论(0编辑  收藏  举报