python3-asyncio异步爬虫测试（无注释demo）

#!/usr/bin/python3
"this is a spider"

import re

import asyncio
import urllib.request, urllib.parse, urllib.error
import http.cookiejar


async def html(urlAddr):
    req = urllib.request.Request(urlAddr)
    """<http.client.HTTPResponse object at 0x7f517e1a2c50>"""
    response = urllib.request.urlopen(req)
    html_image = response.read().decode("utf-8")
    return html_image


def re_html(html_image):
    print(html_image)
    html_images = html_image.result()

    li = re.findall(r'data-sa.*\d"', html_images)
    for temp in li:
        print(temp)
    print("------------------")


def main():
    html_image = html("http://www.lagou.com/zhaopin/Python/1")
    html_image1 = html("http://www.lagou.com/zhaopin/Python/3")


    task = asyncio.ensure_future(html_image)
    task.add_done_callback(re_html)

    task1 = asyncio.ensure_future(html_image1)
    task1.add_done_callback(re_html)

    tasks = [task, task1]

    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))

main()

posted @ 2017-02-26 17:21 sawyer_aquarius 阅读(414) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

Aquarius

https://github.com/AquariusStar

python3-asyncio异步爬虫测试（无注释demo）

公告