import requests
def getHTMLText(url):
try:
kv = {'user-agent':'Mozilla/5.0'}
r = requests.get(url, timeout=30, headers=kv)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
print(r.request.headers)
print('---------------')
return r.text[:1000]
except:
return '产生异常'
if __name__ == '__main__':
url = 'http://www.baidu.com'
print(getHTMLText(url))
import requests
def search(url,keyword):
try:
kv = {'wd':keyword}
r = requests.get(url, params=kv)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
print(r.request.url)
print('---------------')
return r.text[:1000]
except:
return '爬取失败'
if __name__ == '__main__':
url = 'http://www.baidu.com/s'
keyword = '搜索的内容'
print(search(url,keyword))
import requests
def saveImg(url,path):
try:
r = requests.get(url)
r.raise_for_status() # 如果状态不是200,引发HTTPError异常
r.encoding = r.apparent_encoding
with open(path,'wb') as f:
f.write(r.content)
f.close()
return '保存成功'
except:
return '爬取失败'
if __name__ == '__main__':
url = 'https://www.baidu.com/'
path = 'C://za//temp.jpg'
print(saveImg(url,path))