爬虫之Urllib

import urllib.request

response = urllib.request.urlopen("http://www.baidu.com")
print(response.read().decode('utf-8'))  #对获取到的网页源码进行utf-8解码

#获取一个post请求

import urllib.parse  #解析器
data = bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8")
response = urllib.request.urlopen("http://httpbin.org/post",data=data)
print(response.read().decode("utf-8"))

#获取一个get请求
try: #检测 超时处理
    response = urllib.request.urlopen("http://httpbin.org/get",timeout=0.1)
    print(response.read().decode("utf-8"))
except urllib.error.URLError as e:
    print("time out!")


response = urllib.request.urlopen("http://www.baidu.com")
print(response.status)
print(response.getheader("Server"))

url = "https://www.douban.com"
url = "http://httpbin.org/post"
headers= {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"
 }
data = bytes(urllib.parse.urlencode({'name':'eric'},encoding="utf-8"))
req = urllib.request.Request(url=url,data=data,headers=headers,method="POST")
response = urllib.request.urlopen(req)
print(response.read().decode("utf-8"))

url = "https://www.douban.com"
headers= {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"

}
req = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(req)
print(response.read().decode("utf-8"))
posted @ 2020-06-08 17:39 Eliphaz 阅读(77) 评论(0) 收藏举报
刷新页面返回顶部
Eliphaz

爬虫之Urllib

公告