爬虫-- 初级
普通同步代码 耗时
import requests
from functools import wraps
import time
def time_count(func):
@wraps(func)
def inner_func(*args,**kw):
start = time.time()
result = func(*args,**kw)
end = time.time()
print('func {} cost {:.2f} s'.format(func.__name__,end-start))
return result
return inner_func
@time_count
def normal():
for i in range(2):
r = requests.get(URL)
url = r.url
print(url)
if __name__ == '__main__':
URL = 'https://morvanzhou.github.io'
normal()
异步 io (asyncio , aiohttp 等) 对于 IO 密集型 使用 异步 io 来处理,对于 计算密集型也就是依赖于 CPU 的,采用多进程
import asyncio
import aiohttp
from functools import wraps
import time
def time_count(func):
@wraps(func)
def inner_func(*args,**kw):
start = time.time()
t = args[0]
result = func(*args,**kw)
end = time.time()
print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
return result
return inner_func
@time_count
async def job(session):
response = await session.get(URL)
return str(response.url)
# @time_count()
async def main(loop):
async with aiohttp.ClientSession() as session:
tasks = [loop.create_task(job(session)) for _ in range(2)]
finished, unfinished = await asyncio.wait(tasks)
all_results = [r.result() for r in finished]
print(all_results)
if __name__ == '__main__':
URL = 'https://morvanzhou.github.io'
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
另一个 异步 举例
import asyncio
from functools import wraps
import time
def time_count(func):
@wraps(func)
def inner_func(*args,**kw):
start = time.time()
t = args[0]
result = func(*args,**kw)
end = time.time()
print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
return result
return inner_func
@time_count
async def job(t):
# print('String job', t)
await asyncio.sleep(t)
# print('Job', t , 'takes ', t, 's')
@time_count
async def main(loop):
tasks = [loop.create_task(job(i)) for i in range(3)]
await asyncio.wait(tasks)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
同步举例
from functools import wraps
import time
def time_count(func):
@wraps(func)
def inner_func(*args,**kw):
start = time.time()
result = func(*args,**kw)
end = time.time()
print('func {} cost {:.2f} s'.format(func.__name__,end-start))
return result
return inner_func
@time_count
def job(t):
# print('String job', t)
time.sleep(t)
# print('Job', t , 'takes ', t, 's')
@time_count
def main():
[job(i) for i in range(3)]
if __name__ == '__main__':
main()
如果有来生,一个人去远行,看不同的风景,感受生命的活力。。。