解决网站需要cookies登录和内容需要动态加载问题

phantomjsMiddleware

 1 class PhantomJSMiddleware(object):
 2     @classmethod
 3     def process_request(cls, request, spider):
 4         from selenium import webdriver
 5         from scrapy.http import HtmlResponse
 6 
 7         driver = webdriver.PhantomJS(r'C:\InstallFile\Phantomjs\bin\phantomjs.exe')
 8         driver.get(request.url)
 9         driver.implicitly_wait(1)
10 
11         saved_cookies = driver.get_cookies()
12         driver2 = webdriver.PhantomJS(r'C:\InstallFile\Phantomjs\bin\phantomjs.exe')
13         driver2.get(request.url)
14         driver2.implicitly_wait(1)
15         driver2.delete_all_cookies()
16 
17         for cookie in saved_cookies:
18             for k in ('name','value','domain','path','expiry'):
19                 if k not in list(cookie.keys()):
20                     if k == 'expiry':
21                         cookie[k] = 1475825481
22 
23             driver2.add_cookie({k:cookie[k] for k in ('name', 'value', 'domain', 'path', 'expiry') if k in cookie})
24             print(cookie)
25         driver2.get(request.url)
26         driver2.implicitly_wait(1)
27 
28         content = driver.page_source.encode('utf-8')
29         driver.quit()
30 
31         return HtmlResponse(request.url, encoding='utf-8', body=content, request=request)

 

posted @ 2017-11-28 11:37  人微言轻1  阅读(716)  评论(0编辑  收藏  举报