import urllib
import urllib.request
import urllib.parse
import json
#分析获取数据
url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20'
heeards = {
'user-agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
}
#请求对象定制
request = urllib.request.Request(url=url,headers=heeards)
#获取响应数据
response = urllib.request.urlopen(request)
context = response.read().decode('utf-8')
# obj = json.loads(context)
# print(context)
# 下载到本地
# open方法默认使用 gpk编码 需要指定编码格式,'utf-8'
# fp =open('douban.json','w',encoding="utf-8")
# fp.write(context)
with open('douban1.json','w',encoding='utf-8') as fp:
fp.write(context)
封装函数
import urllib
import urllib.request
import urllib.parse
import json
def request(page):
# url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20'
heeards = {
'user-agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
}
#请求对象定制
data ={
'start': (page - 1) * 20,
'limit': 20
}
base_url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&'
url = base_url + urllib.parse.urlencode(data)
request = urllib.request.Request(url=url,headers=heeards)
return request
def get_response(reqest_data):
#获取响应数据
response = urllib.request.urlopen(request_data)
context = response.read().decode('utf-8')
return context
# obj = json.loads(context)
# print(context)
def download_data(context_data,page):
# 下载到本地
# open方法默认使用 gpk编码 需要指定编码格式,'utf-8'
# fp =open('douban.json','w',encoding="utf-8")
# fp.write(context)
with open(str(page)+'_douban1.json','w',encoding='utf-8') as fp:
fp.write(context_data)
if __name__ == '__main__':
start_page = int(input('请输入起始页'))
end_page = int(input('请输入结束页'))
for page in range(start_page,end_page+1):
# 定义请求页码
request_data = request(page)
# 获取响应数据
context_data = get_response(request_data)
# 下载数据
download_data(context_data,page)
page += 1