按关键字搜索并爬去微信公众号文章

#!/usr/bin/python3
# -*- coding:utf-8 -*-

import re
import json
import time
import requests
from bs4 import BeautifulSoup
from urllib.request import quote


"""
from urllib.request import quote
url = 'http://www.example.com/api.php?text=中文在这里'

# 不带附加参数
print('>>> %s' % quote(url))
>>> http%3A//www.example.com/api.php%3Ftext%3D%E4%B8%AD%E6%96%87%E5%9C%A8%E8%BF%99%E9%87%8C

# 附带不转换字符参数
print('>>> %s' % quote(url, safe='/:?='))
>>> http://www.example.com/api.php?text=%E4%B8%AD%E6%96%87%E5%9C%A8%E8%BF%99%E9%87%8C
"""

"""
https://httpbin.org/ip
"""


class WeixinSpider():
    def __init__(self,key):
        self.key = key
        self.sougou_search_url = "http://weixin.sogou.com/weixin?type=1&query={}&ie=utf8&s_from=input&page={}&_sug_=n&_sug_type_="
        # 设置header,模拟浏览器
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'}
        # 代理池接口:从代理池获取代理IP
        self.proxy_url = 'http://10.0.0.9:8000'
    def get_proxy(self):
        '''
        请求代理池,随机返回IP地址
        :return: 
        '''
        # text = requests.get(self.proxy_url)
        text = requests.get(self.proxy_url).text
        proxy = {
            'http':'http://{}'.format(text),
            'https':'https://{}'.format(text)
        }
        print('当前代理IP是:http://{}'.format(text))
        return proxy
    def get_search_response(self,url,proxy=None,total=3):
        if 0 == total:
            return None
        try:
            content = requests.get(url,headers=self.headers,proxies=proxy,timeout=3).content
        except Exception as e:
            print('异常:{}'.format(str(e)))
            print('代理异常,重试...')
            total -= 1
            return self.get_search_response(url,proxy=self.get_proxy(),total=total)
        if '输入验证码' in content.decode('utf-8'):
            total -= 1
            return self.get_search_response(url, proxy=self.get_proxy(), total=total)
        else:
            return content

    def get_wx_hkmovie(self,sougou_response):
        soup = BeautifulSoup(sougou_response.decode('utf-8'),'lxml')
        return [i.find('p',class_='tit').find('a')['href'] for i in soup.find_all('div',class_='txt-box')]

    def get_wx_article(self,response):
        req = re.compile(r'var msgList = (.*?}}]})',re.S)
        article_urls = re.findall(req,response.decode('utf-8'))
        return json.loads(article_urls[0])

    def time_format(self,timestamp):
        return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(timestamp))

    def parse_article(self,response):
        article_list = response.get('list')
        # articles = []
        prefix = "https://mp.weixin.qq.com"
        for article in article_list:
            article_author = article.get('app_msg_ext_info').get('author')
            article_url = article.get('app_msg_ext_info').get('content_url')
            article_title = article.get('app_msg_ext_info').get('title')
            article_addtime = article.get('comm_msg_info').get('datetime')
            print("作者:{},标题:{},时间:{},链接:{}".format(article_author,article_title,article_addtime,article_url))
            # item = {'author':article_author,'title':article_title,'datetime':self.time_format(article_addtime),'url':prefix+article_url}
            # articles.append(item)
        # return articles

    def main(self):
        content = self.get_search_response(self.sougou_search_url.format(self.key,10))
        for url in (self.get_wx_hkmovie(content)):
            print(url)
            html = self.get_search_response(url)
            article_dict = self.get_wx_article(html)
            # with open('Weixin_{}.txt'.format(self.key),mode='w',encoding='utf-8') as wf:
            #     wf.write(url+"\n")
            self.parse_article(article_dict)
                # wf.write(article_str+"\n")
                # wf.write("##############################################"+"\n")
            # break
if __name__ == '__main__':
    key = input('>>> ')
    spider = WeixinSpider(key)
    spider.main()
    # with open('SogouWeixin_python.txt',mode='r',encoding='gb2312') as rf:
    #     for line in rf:
    #         print(line)

  

 

 1 D:\soft\work\python35\python.exe D:/soft/work/work/20170925/sougou.py
 2 >>> python
 3 http://mp.weixin.qq.com/profile?src=3&timestamp=1508164694&ver=1&signature=IstVuOsMvC9JxSgNijn*x0hCsKSj9gxcQUZMYTSLsJ3DmCdT1iL*xhnLEy8kMUsDjAPhuZ1FOmLYm0tB-cUIPQ==
 4 作者:陈章,标题:2017年9月6日,时间:1504691232,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4c0t9dSZy-x7--ObsKVye0p4xpKHPQc5Rmu9Y6BKiGRPq4xK72Rrm-F1uQWc*nQN-iwUn4QC93JQyAYeDvvYka0=
 5 作者:陈章,标题:2017年9月5日,时间:1504602088,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4UzdgZNoGvCUZEA-mb9rhHB3SVBTF0wXcdeKnAqHDvS36jN0ZwHyiQNml-OHtr63Yyg6eJM*Zy9uCtBWUmG74nk=
 6 作者:陈章,标题:2017年9月4日,时间:1504518038,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4f9bHpSovDnfkNgXYqbRiEgyHgP7bqDJy3wK8At*i9FvaNUmiRiMYRQzSLUqefyak-j1dc6Ds3rOI5LSw73A5dE=
 7 作者:陈章,标题:2017年8月28日技术日记,时间:1503978338,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4bdAel5vdI1YrLxks1Gy4I2SH61DPdV11BucznpftjztJwNs3ayroyisMmPAg3zic5Z-MnRD9PyOtxuDgPFNSNg=
 8 作者:陈章,标题:2017年8月26日,时间:1503755794,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4eY0iFeJudhV5TXqM0IRKyKNuCfpNtrvxojLcqmwPAeHpc3fNrR9rLlXizeNsQc8N7rbP4ZDxur5SlIxpCkxsQs=
 9 作者:陈章,标题:2017年8月13日,时间:1502617504,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4fZrEzQRRP*oLpMdmGaumkAxOuwOJlddlONwv63YQ-HIG5q3CojJOKoCxyXoAkknPk65GZfyTygnKuc8YoYRlUE=
10 作者:CheungChan,标题:selenium调用chromedriver禁用flash时遇到的深坑,时间:1500627064,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4UjEBWliLF9GsgOv59rAWexEQjU2jJNXDFUJ6rWDF8nXxkinRHg0Sun8ujpdSvk*3qq*lbyu6JF3-v14fY3xxco=
11 作者:CheungChan,标题:python项目简单实现自定义配置覆盖默认配置,时间:1500448400,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4ezoLAjpY8xq9mXBTbuSJ-gMlCJUD5LqqzAEqJI6KerR8pcn3S9VVYVjH-wuNhqBDYOHFFKvnNOTD1hUJtvIvYo=
12 作者:ChuengChan,标题:配置vim为pythonIDE小结,时间:1492775424,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4YsXsdEBiCcX2wa5VhFHn9Q1Qrv33qzRuIrHr1qXW-UJ8JAz4s3SFveedBlI6I1KdHofhqkw5KxMZ922aErbVZ4=
13 作者:CheungChan,标题:django框架orm层api简单总结,时间:1484032960,链接:/s?timestamp=1508164694&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PLMzzNSuVV3Q6b5Nw6Bo7IjXNSyDo1XVgJk8v8r09UQm6Uu667lTnMKEbeQAJa5k4TNDFYDGyFOOB*SZdRl4DDLseGEEqm1K4a8lDkoaBrARYyxccQurYHD8ks3Lt*7aPuONMMQbE*9Wjl2mwP0BOB8=
14 http://mp.weixin.qq.com/profile?src=3&timestamp=1508164694&ver=1&signature=DGakM4MrDnvd01JM6ApX3Yv*jmKeiIhex*DYA5PVyAcknJYfetZdWu4uD7bKW4Jg3NmhsTMxk3yu4hURxVP9UA==
15 作者:柠檬,标题:Python 运算符,时间:1479204290,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CkKjbQpKLD9LrI7Zs3hl90UuQo77q5EPfuRKcPtcpT6vn9PfzIsman4DhSHvSPa6HSDNVxZ7ct1vYV-bgxiQJEI=
16 作者:柠檬,标题:Python 变量类型,时间:1479115697,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3Cmg9ce6bARZs-nWzuhz5EujlCnZxQ3hDMFcnsNtMgZUUOUWTDVA83CZRsqUkyIxOqkjiLb4sbwseLOoIkPy-2j8=
17 作者:柠檬,标题:Python 环境搭建,时间:1478769596,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CkWwl3doZOH1*7N0fAlrlQjh1ACsDT1dxQU5f5T-FBbzpZG7Z6SMyMx5pdIWQNAQluTmxLspgWLK4K3Zcw6Pkw0=
18 作者:柠檬,标题:Python 简介,时间:1478687814,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CvKG5Dmbi8u0AaNe7tiK544KRYVnEbh4Jk6tTKfBpkVD*ikrK7eXBFH*PzhX7K3MPjYweYNOZtZfmnEn3arfOks=
19 作者:,标题:MySQL NULL 值处理,时间:1472011203,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3Cruu1pv-zT3jYImOcFJ30TyPuzQRcinWOZln6ow6UsPGRUyNQnNrS45LeelGmwy*8o8wvG4hweTnyqxD7iiXN6Q=
20 作者:,标题:Mysql 连接的使用,时间:1471930374,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CkC9bfF7-9OYosjOt-*q-7Ddb7umB8cPzFqi1ZOQoi6iW9wEqNIeIM1Q6htJVQZSp*ibkwbDgm57NnUKIzvKeSg=
21 作者:,标题:MySQL LIKE 语句,时间:1471829721,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CkvBo5gNgAP8glRJnkFyMoCgNjnDacndoVYwpNspE4XUgAIRIeA1XvgJRzIm5YTh8EpKlSs3JVhALDaua1vQGhg=
22 作者:,标题:MySQL DELETE 语句,时间:1471492609,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3Cn5RdI-eO450bpGe3AS61y5xIaiQ5DR3dSYLIX2nUNzb*vdyABRmvm9ljNjxUU1nOx4MNjfSpMKiz3eE5MowxhQ=
23 作者:,标题:MySQL UPDATE 语句,时间:1471416786,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CnxL*5wvdLCQNdbop-csF-jUx6RGBbxqaLZPjrTt8aj0xn*THQBfo2bv75XO5Fnl6JLKAFS2p-fexRRhM8DC2Fs=
24 作者:,标题:MySQL where 子句,时间:1471321727,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMicQqaMwtoB3yG1IgRxJIq2S9*xCw9eG5kVERmL-3jc3CvugD5bCSl54FX2F4jr*bxAkHntPTJ6Qlpq9CYfR1LRlu954d2eCkcd8Qkv*iucuCkEfEA6tOGS6yAzbAm8LDHc=
25 http://mp.weixin.qq.com/profile?src=3&timestamp=1508164694&ver=1&signature=lAKlFlXYoTKA9eFAhmlPH4RWSEJYea9WxpFAzwgjUByqf1jijpjdCsILg8NWliN*w8NYPTX*kx-hBd5gFEZ2wg==
26 作者:小成老师,标题:搞事情,猜猜小成老师最近要发什么大招?,时间:1492007521,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyTy8SUxNynpwaC-SYeiYgG7-A69KDbNXliPTARb5BIZ3JqtNruMpdZoakPfNeBXRuMQ2XHiJ2abXGGSUrA7Kmen0=
27 作者:,标题:每一个中国人,都应该读一下《论持久战》(深度),时间:1491645931,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT9xnPAwDLTiv9dYovIue3caKvzUGZHn6EZdZnLyDGfV4F9CkwNauojMIShwM5cDQYdCVAQGLDds3*aGSCeqItdY=
28 作者:,标题:最可怕的不是失业,而是你没有考虑过失业!,时间:1487684264,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT1OhJ90nh6iPbTK59arcaMepvVgc8tmElih1aiEVA1iSLJONWE-4m3qJMRBgu2tCSBV4i4KRYk-3s7QH4uupHJ0=
29 作者:sunedu,标题:别再上“一万小时定律”的当了,科学家说它很不靠谱,时间:1487474925,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT2ZTaamVZCyq4yS7lGKYD9gDXTwM*dPLWCewge7zXslPPfu7xivwNWa-nKOzj3NTobFn6IPJV01SiO36ZlaZnIM=
30 作者:,标题:“4E ”认证体系与继续教育,时间:1486557161,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT6XPYd7JBDfgyk07l5ojGTIUve0YJdDVm8ID-IqwnJ368i5H2ML*UGctef1KToN0EtJvC69RA-QBCX5hxg9WmBo=
31 作者:小成老师,标题:金融理财概述及CFP资格认证制度,时间:1486305736,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT98NAfm5pqQ5GxssI2AZDN*4fUTZmCxudyPsbMNhf0GZhxp1Z*r*KRMRdZQ8TIs*FtwrJ51-yGcqOOg1-gcuBwg=
32 作者:小成老师,标题:小成老师AFP助学计划,时间:1486219932,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyTy7WSNv*rEBQlTAtBYs0yqgWQNwLNhaJeuwSRPgDYpIpJElK-NUpS8jXYJh-E6jvI9cOt77J0*RXVOubaDz*27Y=
33 作者:梦想规划师,标题:说一说理财中风险那些事儿,时间:1486127244,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT4QZinRr5xTaN0tdINK2uzxZFlV0uuQix3aa6Q4Le*inqOsQ-2BcD0Y4QAHDTK575VucKqDLmqEhN-MPCKhoMzQ=
34 作者:曾成,标题:该不该买万能保险?,时间:1479574339,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT6DMjqyRnz2vHL-awMOzHzYEQhmWgaZLD*uOEcuop8GS3NV-KZpxgcKojYxmOCOJgiaeV4dhoTg1zVZeSWQvJI4=
35 作者:,标题:你投P2P,还是学点风险管理吧,时间:1479521555,链接:/s?timestamp=1508164695&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PHvZZO8-O9A090T4EnDOMid71vderF-o*M5WZeqTL9xKa9Uv-oZFjGTZNsMui0EyT0ZEQVVASfWg84ZSYQOpm7xdiAFU40Foo17VEDOn7T4XTck1dnL3XhS2pc*NWbrt4ID6B6iB9ARQSoI-yyLm0EA=
36 http://mp.weixin.qq.com/profile?src=3&timestamp=1508164694&ver=1&signature=1aWiNfiMZEaNavn5O-rtgID-YtbD9cnY4x0w1oZaH5GMxtN-TSUduOg6EUI3xmjouGa0g-dZ3ToEnExZsGYf-A==
37 作者:,标题:动态生成网页下载—渲染类,时间:1497412525,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7AIM0G-Kmb47P*BFkGRgsBmxAjvE1LQnsao0NhkptIIBCLRk65XyOiWvJZ3NalWe6iQZ1iFLtSAaXXoHotmF5yo=
38 作者:,标题:多进程并发爬虫,时间:1496979039,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7Ai3OVjtOijUYYDfiItUnxGlFbLzt3N2iDbdNSN2*mks8vmrl0kKv1ndf3tE1EKk9sUstGv4kJlGsrOnF19dBDE=
39 作者:,标题:链接爬虫,时间:1496633905,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7C0dKXO*KdnHmx6ZctteXjRYYH1kRxSD9qssqnCus*H3trP*YkMa532VZ8V00DJVIYeIbW5Wv5zxvYmYOVWqf-k=
40 作者:,标题:堆排序,时间:1496320156,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7PAuhh51XxdAMcsZ4EuqDp7nTH*rgueVWIh3D9xntz9C*nYahdcAP4cz5MaPbynTjZ6hED0fhFTetBO3ZYKPKSs=
41 作者:,标题:糗事百科小爬虫,时间:1496211976,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7Azs0C-5nzr1P-nRr9CV51o3am*cazrOno90q4Dh6jeadJjDeOVVIKOC*ukBAWL14haJBtTMXuv22jRVU*9tlYA=
42 作者:,标题:scrapy框架中的headers类,时间:1495845289,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7Gbq27ajqgH429Bif5nj-wzKaBjSB*QInnwIjCiDcW5707S9q5XvUYPzMGSENDWITwwq77ERgPXEmvYLnfM*eLg=
43 作者:,标题:生成密码文件,时间:1495787373,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7Gp7ocbsrZCHuJbgjursT5*bELP--5G8vBY98RAAEJNWub9YARTILP0fxnOmljg*k58Yx6I6YJdBCcm-6NE3z1M=
44 作者:,标题:scrapy爬虫,时间:1495612119,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7M0yMWUG*nsyDTx*WKfs80icVgDUTuh04Mtc0OnI6MfZcHJHFvQlQwnPLX2tdsuikwfEuwSGvDWo51Zod77w7hI=
45 作者:,标题:猜数字,时间:1495524656,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7ARgprB663iFp1OP7YBS0DOxdEr-YXbeJW3UiAr4fW7p1jy*0YSv3hpyq744Oq4Z58Md2JTKuYhXwbPCpG3BTUM=
46 作者:,标题:测试浏览器速度,时间:1495444483,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfyIB6Zj32AM8mGX*OGODZYfgR1E-ArNpwhQXjCA57P7KjEtUXbP7Yp9Mn-kZWpdEraeYZKbxRkoA3BBksRyxh4*yqWlhWewnWvX1xPfrQVPoqmz-cEl1bjPzDGANZmosM=
47 http://mp.weixin.qq.com/profile?src=3&timestamp=1508164694&ver=1&signature=AcFuowV3ZjTBxLCcXCX7-NBm-D0-YaKnreBiywa*sAnjb*qfdmtNhF-H9UNfcqTclwu5KHP56X3MNkL3f1y11w==
48 作者:贝克田庄,标题:Python爬取贴吧图片,时间:1507790324,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc5*NqoM6yTCr4gLxSWKHvu*pmy1ml39cfe4LPTw-3yYuNrpJQnoC9Loy3Lvv2AZHjSoXSbLmS-FhlRd-85vglKg=
49 作者:,标题:编写更好Python程序的5个技巧,时间:1507553279,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc3NBP-Hw1NoI8MHbNqfETFqdtzyzeSM3hhMq6JCaAF-tSTHXoG361zV1gIp2zFgFkVdYEhDtwjEqnuG5snFjRUM=
50 作者:冰是睡着的水,标题:500 行 Python 代码构建一个轻量级爬虫框架,时间:1507460022,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDccwLP6o*MQnF*rODy8JWF0lE2ha3dqyFZihjJcy28yYVyrpje-rWYObI4xLiyhAyW7-gc*gvSV1Cq5HO46xQYfXo=
51 作者:凡梦,标题:Android和Python之间的小秘密,时间:1505977153,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc7JpMl6NMhfDhyUCzP1oz-93FU363iqTMdyGSGEUBGfomppvTxRK6YBJQFx2N5o-F5vGgEoc5XO-ytYVi4uASHA=
52 作者:,标题:Python多线程学习,时间:1505905972,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc-lXkRCCxbqDfC-clFpxCWIfHwFMxLpPIIHADCe-Zf1mRau31gb7U9ihHmi7zZpzXTGzEDy9vQb7-7dWWikoTFE=
53 作者:,标题:十一去哪里?Python来帮你!,时间:1505139904,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc*2OpPausstVFAwxXXXvC8OV1ebFmMpJ8*Q7w-0CFWNnffkjCTVbQ8bWLYwyBwGu1-IR4HLCYdl*LQFj7-iMnK8=
54 作者:凡梦,标题:还不会Python正则表达式?看这篇文章试试,时间:1504964495,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDccz6DSCXiqZknbS5BSRtXRLZWqPaPQTZws0q0QI5-RUGm727trbP6sCken8lavDHyFGUIkSO*7E5c2noK9MR1E4s=
55 作者:,标题:Python开发简单爬虫,时间:1504794202,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc4Mp3qAW0gTtVCTnWP7aGukR3RTpeZ7DrypIY-efrMcKqfP4e2fsyTvT4hP32i*JMp-GjAB2bLIU296ZR3b8A3I=
56 作者:凡梦,标题:Python模拟登陆12306,时间:1504614650,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc0-vdhMcXi5lFDwCICBYako-zP27ZNK7OiBQmyyTP9LOIiqfAAbGwoZ6bCmOG5gLgwD-z*YR-UxAGwuXdz6hrOs=
57 作者:,标题:为什么说Python是伟大的入门语言,时间:1504510781,链接:/s?timestamp=1508164696&src=3&ver=1&signature=iIJXEqJtxS0IG1vEt7P4PPnNGBHjYYzvJr9xPZnOhRfYgAeU2MNQZidKCzP8jXBtl0jT2sVXsh1zP09j8bDcc9LWxZSOiUiU-n*7XW1wnFKzi*vvLrK2luaD*zacqViw2NkSe9IobfvEP-JORVZ087jIKdUhWFzwwdRgGTPixP0=
58 http://mp.weixin.qq.com/profile?src=3&timestamp=1508164694&ver=1&signature=03sGuUKmFI3W4EMT3e3kC-yoRJ6M*dEwlIkVwjz83qgF6kARbIVC*MRgelmDe7UQ5CmjKv*ZmJ**zjOAUTuIdA==
59 Traceback (most recent call last):
60 ...
执行结果

 

由于请求过于频繁,就会出现如下所示验证码:

 

楼主学艺不精,没有做验证码识别  ==''

 

posted @ 2017-10-16 22:43  lixin[at]hitwh  阅读(2102)  评论(0编辑  收藏  举报