python爬百度文库课件
库:re;selenium;requests
源码:
from selenium import webdriver
import re
import requests
import re
import requests
def open_img(items):
for item in items:
item = re.sub('&','&',item)
rsp =requests.get(item)
yield rsp.content
for item in items:
item = re.sub('&','&',item)
rsp =requests.get(item)
yield rsp.content
url ='https://wenku.baidu.com/view/4e3d35d969eae009581becd5.html?from=search' #可修改成别的ppt网址
browser =webdriver.Chrome()
try:
browser.get(url)
html =browser.page_source
pattern =re.compile('<div class="ppt-page-item.*?src="(.*?)".*?>',re.S)
items =re.findall(pattern,html)
n =0
for i in open_img(items):
with open('%d.jpeg'%n,'wb') as file:
file.write(i)·
n +=1
print('第%d张图片下载完成'%n)
finally:
browser.close()
input()