爬取天猫超市的牛奶信息时,遇到重定向问题:
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
import sys reload (sys) sys.setdefaultencoding( 'utf-8' ) import urllib2,urllib,re from bs4 import BeautifulSoup import cookielib url = 'https://list.tmall.com/search_product.htm?spm=a3204.7084717.1996500281.2.EUMiGi&cat=51462017' headers = { 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' ' Chrome/45.0.2454.101 Safari/537.36' , 'referer' :url} cookie = cookielib.CookieJar() handler2 = urllib2.HTTPCookieProcessor(cookie) opener = urllib2.build_opener() opener.add_handler(handler = handler2) opener.addheaders = headers.items() urllib2.install_opener(opener = opener) page = opener. open (url) print page.url ####这里看到定向到登录页去了 html = page.read() print html |