![](https://img2020.cnblogs.com/blog/1637570/202003/1637570-20200315164030522-2056604365.jpg)
![](https://img2020.cnblogs.com/blog/1637570/202003/1637570-20200315172016208-89731437.jpg)
![](https://img2020.cnblogs.com/blog/1637570/202003/1637570-20200315165447525-1840917538.jpg)
![](https://img2020.cnblogs.com/blog/1637570/202003/1637570-20200315164600229-812770005.jpg)
![](https://img2020.cnblogs.com/blog/1637570/202003/1637570-20200315170240667-297994824.jpg)
import requests
def getHTMLText(url):
try:
kv = {'user-agent':'Mozilla/5.0'}
r = requests.get(url, timeout=30, headers=kv)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
print(r.request.headers)
print('---------------')
return r.text[:1000]
except:
return '产生异常'
if __name__ == '__main__':
url = 'http://www.baidu.com'
print(getHTMLText(url))
import requests
def search(url,keyword):
try:
kv = {'wd':keyword}
r = requests.get(url, params=kv)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
print(r.request.url)
print('---------------')
return r.text[:1000]
except:
return '爬取失败'
if __name__ == '__main__':
url = 'http://www.baidu.com/s'
keyword = '搜索的内容'
print(search(url,keyword))
import requests
def saveImg(url,path):
try:
r = requests.get(url)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
with open(path,'wb') as f:
f.write(r.content)
f.close()
return '保存成功'
except:
return '爬取失败'
if __name__ == '__main__':
url = 'https://www.baidu.com/'
path = 'C://za//temp.jpg'
print(saveImg(url,path))