https://www.bilibili.com/video/av19956343?p=143

#POST请求

import urllib.request
import urllib.parse

url="http://www.sunck.wang:8085/form"
#将要发送的数据合成一个字典
#字典的键取网址里找,一般为input标签的name属性的值
data={
    "username":"sunck",
    "passwd":"666"
}
#对要发送的数据进行打包
postData= urllib.parse.urlencode(data).encode("utf-8")
#请求体
req=urllib.request.Request(url,postData)
#请求
req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"
)
response=urllib.request.urlopen(req)
print(response.read().decode("utf-8"))

https://www.bilibili.com/video/av19956343?p=144

#抓取网页动态Ajax请求的数据

import urllib.request
import json

#取消证书验证
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

def ajaxCrawler(url):
    headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"}
    req = urllib.request.Request(url,headers=headers)
    response = urllib.request.urlopen(req)

    jsonStr=response.read().decode("utf-8")
    jsonData=json.loads(jsonStr)

    return jsonData
for i in (1,11):
    url="https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start="+str(i*20)+"&limit=20"
info=ajaxCrawler(url)
print(len(info))

 

posted on 2020-02-07 23:35  雨过山  阅读(181)  评论(0编辑  收藏  举报