requests-get请求
1 import requests 2 3 response= requests.get('http://www.baidu.com')#get方法请求网址 4 print(response) 5 print(response.status_code)#状态码 6 print(response.text)#响应体 7 print(response.cookies)#获取cookies
另外还有response.url,response.history历史记录
1 #requests的各种请求方式 2 import requests 3 requests.get('http://httpbin.org/get') 4 requests.post('http://httpbin.org/post') 5 requests.delete('http://httpbin.org/delete') 6 requests.head('http://httpbin.org/head') 7 requests.options('http://httpbin.org/options')
1 #简单的get请求 2 #通过response.text获得响应体 3 import requests 4 response = requests.get('http://httpbin.org/get') 5 print(response.text) 6 7 #带参数的请求 8 #利用params将字典形式数据传入进去,相当于urllib.parse.urlencode 9 data = { 10 'name':'germy', 11 'age':22 12 } 13 response = requests.get('http://httpbin.org/get',params=data) 14 print(response.text)
1 #解析json 2 #response.json()相当于json.loads()方法 3 import requests 4 import json 5 response = requests.get('http://httpbin.org/get') 6 print(response.json()) 7 print('*'*100) 8 print(json.loads(response.text))
1 #获取并保存二进制数据,response.content即二进制数据 2 import requests 3 response= requests.get('http://inews.gtimg.com/newsapp_ls/0/1531939223/0') 4 #print(response.content) 5 with open('D://tomas.jpg','wb') as f: 6 f.write(response.content)
1 #添加headers 2 import requests 3 response = requests.get('https://www.zhihu.com/explore') 4 #print(response.text)#结果返回服务器端错误,证实爬虫被知乎禁止了 5 #结果:<html><body><h1>500 Server Error</h1> 6 7 #解决的方法是添加headers,方法非常简单,加进去就可以了 8 headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \ 9 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LBBROWSER'} 10 response = requests.get('https://www.zhihu.com/explore',headers=headers) 11 print(response.text)