python urllib 基础4

import  urllib
import urllib.request
import urllib.parse
import json
#分析获取数据

url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20'

heeards = {
            'user-agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
}
#请求对象定制
request = urllib.request.Request(url=url,headers=heeards)
#获取响应数据
response = urllib.request.urlopen(request)

context = response.read().decode('utf-8')

# obj = json.loads(context)
# print(context)

# 下载到本地
# open方法默认使用 gpk编码 需要指定编码格式,'utf-8'
# fp =open('douban.json','w',encoding="utf-8")
# fp.write(context)

with open('douban1.json','w',encoding='utf-8') as fp:
    fp.write(context)

封装函数

import  urllib
import urllib.request
import urllib.parse
import json

def request(page):
    # url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20'

    heeards = {
                'user-agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
    }
    #请求对象定制
    data ={
        'start': (page - 1) * 20,
        'limit': 20
    }

    base_url = 'https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&'
    url = base_url + urllib.parse.urlencode(data)
    request = urllib.request.Request(url=url,headers=heeards)
    return  request
def get_response(reqest_data):
    #获取响应数据
    response = urllib.request.urlopen(request_data)

    context = response.read().decode('utf-8')
    return context
# obj = json.loads(context)
# print(context)
def download_data(context_data,page):
# 下载到本地
# open方法默认使用 gpk编码 需要指定编码格式,'utf-8'
# fp =open('douban.json','w',encoding="utf-8")
# fp.write(context)

   with open(str(page)+'_douban1.json','w',encoding='utf-8') as fp:
    fp.write(context_data)

if __name__ == '__main__':
    start_page = int(input('请输入起始页'))
    end_page = int(input('请输入结束页'))
    for page in range(start_page,end_page+1):
#       定义请求页码
        request_data = request(page)
#       获取响应数据
        context_data = get_response(request_data)
#       下载数据
        download_data(context_data,page)
        page += 1
posted @ 2024-07-11 10:38  donghongchao  阅读(2)  评论(0编辑  收藏  举报