模仿浏览器访问网页
基于python2
# -*- coding: utf-8 -*-
import urllib2
import random
# User-Agent大全
# http://www.360doc.com/content/12/1012/21/7662927_241124973.shtml
user_agents = [
# safari 5.1 – MAC
'Mozilla / 5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en - us) AppleWebKit / 534.50 (KHTML, like Gecko) Version / 5.1 Safari / 534.50',
# safari 5.1 – Windows
'Mozilla / 5.0 (Windows; U; Windows NT 6.1; en - us) AppleWebKit / 534.50 (KHTML, like Gecko) Version / 5.1 Safari / 534.50',
# IE 9.0
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
# IE 8.0
'Mozilla / 4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident / 4.0)',
# IE 7.0
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
# IE 6.0
'Mozilla / 4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
# Firefox 4.0.1 – MAC
'Mozilla / 5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko / 20100101 Firefox / 4.0.1',
# Firefox 4.0.1 – Windows
'Mozilla / 5.0 (Windows NT 6.1; rv:2.0.1) Gecko / 20100101 Firefox / 4.0.1',
# Opera 11.11 – MAC
'Opera / 9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto / 2.8.131 Version / 11.11',
# Opera 11.11 – Windows
'Opera / 9.80 (Windows NT 6.1; U; en) Presto / 2.8.131 Version / 11.11',
# Chrome 17.0 – MAC
'Mozilla / 5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit / 535.11 (KHTML, like Gecko) Chrome / 17.0.963.56 Safari / 535.11',
# 傲游(Maxthon)
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
# 腾讯TT
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
# 世界之窗(The World) 2.x
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
# 世界之窗(The World) 3.x
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)',
# 搜狗浏览器 1.x
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident / 4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
# 360浏览器
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
# Avant
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)',
# Green Browser
'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
]
url = 'http://blog.csdn.net/****'
my_headers = {'User-Agent':
# 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
random.choice(user_agents),
'Host': 'blog.csdn.net',
'Referer': 'http://blog.csdn.net/',
'GET': url
}
def get_content(url_add, headers):
req = urllib2.Request(url, headers=headers)
html = urllib2.urlopen(req)
return html.read()
print get_content(url, my_headers)