request的基本使用用法

urllib.request基本使用用法

(1)基本用法

    url = "http://www.baidu.com/"
    # 获取response对象
    response = urllib.request.urlopen(url)
    # 读取内容 bytes类型
    data = response.read()
    # 转化为字符串
    str_data = data.decode("utf-8")
    # 字符串转化为bytes
    str_name = "baidu"
    bytes_name =str_name.encode("utf-8")

(2)url参数中含有汉字的需要转译

    url = "http://www.baidu.com/s?wd="
    name = "python中含有汉字"
    final_url = url + name
    #网址里面包含了汉字;ascii是没有汉字的;url转译
    #将包含汉字的网址进行转译
    encode_new_url = urllib.parse.quote(final_url,safe=string.printable)
    # 使用代码发送网络请求
    response = urllib.request.urlopen(encode_new_url)
    print(response)
    #读取内容
    data = response.read().decode()
    #保存到本地
    with open("02-encode.html","w",encoding="utf-8")as f:
        f.write(data)

(3)传入字典类型的参数

    url = "http://www.baidu.com/s?"

    params = {
        "wd":"中文",
        "key":"zhang",
        "value":"san"
    }
    
    # 字典类型转译成参数
    str_params = urllib.parse.urlencode(params)
    final_url = url + str_params

    # 将带有中文的url 转译成计算机可以识别的url
    end_url = urllib.parse.quote(final_url,safe=string.printable)

    response = urllib.request.urlopen(end_url)

    data = response.read().decode("utf-8")
    print(data)

(4)添加header

第一种添加header的方式

    url = "https://www.baidu.com"
    # 添加请求头的信息
    headers = {
        # 浏览器的版本
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
        'name':'chen'
    }
    # 创建请求对象
    request = urllib.request.Request(url, headers=headers)

第二种添加header的方式:动态添加

    url = "https://www.baidu.com"
    # 创建请求对象
    request = urllib.request.Request(url)
    # 动态添加请求头信息
    request.add_header("User-Agent",
                       "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
    # 请求网络数据
    response = urllib.request.urlopen(request)
    data = response.read().decode("utf-8")
    # 获取到完整的url
    final_url = request.get_full_url()
    print(final_url)
    with open("baidu.html", "w", encoding="utf-8") as f:
        f.write(data)
        
    # 获取请求头的信息
    request_headers = request.headers
    print(request_headers)
    user_agent = request.get_header('User-agent')
    print(user_agent)
    # 响应头
    print(response.headers)

(5)使用代理

    url = 'https://www.cnblogs.com/chenshy'
    # 添加代理
    proxy = {
        'http': '119.102.25.91:9999'
    }
    # 代理处理器
    proxy_handler = urllib.request.ProxyHandler(proxy)
    # 创建自己的opener
    opener = urllib.request.build_opener(proxy_handler)
    # 拿着代理ip发送请求
    data = opener.open(url).read().decode("utf-8")
    print(data)

(6) cookie

a.在头部添加cookie

    url = 'https://www.yaozh.com/member/'
    headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
               'Cookie':'acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; PHPSESSID=t4rb1af4vmks8gete5oqfd6ub7; _ga=GA1.2.521923122.1555078606; _gid=GA1.2.523976398.1555078606; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1555078606; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv; yaozh_logintime=1555078687; yaozh_user=729821%09lifelover; yaozh_userId=729821; _gat=1; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1555078691; yaozh_uidhas=1; yaozh_mylogin=1555078693; acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv'}
    request = urllib.request.Request(url,headers=headers)
    response = urllib.request.urlopen(request)
    data = response.read().decode('utf-8')
    print(data)

b.登录之后获取cookie ,cookiejar的使用

import urllib.request
from http import cookiejar
from urllib import parse

def login():
    # 1.代码登录 获取cookie 带着cookies请求个人中心
    url = 'https://www.yaozh.com/login'
    # 登录的参数
    login_form_data = {
        'username':  'lifelover',
        'pwd': 'chen19960319',
        'formhash': 'F456373F7B',
        'backurl': 'https%3A%2F%2Fwww.yaozh.com%2F'
    }
    # 发送登录请求
    cook_jar =  cookiejar.CookieJar()
    cookie_handler = urllib.request.HTTPCookieProcessor(cook_jar)
    opener = urllib.request.build_opener(cookie_handler)

    # 带着参数,发送post请求
    headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    # 1.需要转译,转码  2.需要byte类型
    login_str = urllib.parse.urlencode(login_form_data).encode('utf-8')
    request = urllib.request.Request(url,headers=headers,data=login_str)
    # 如果登录成功,cookiejar自动保存cookie
    response = opener.open(request)

    # 带着cookie请求个人中心
    center = 'https://www.yaozh.com/member/'
    center_request = urllib.request.Request(center,headers=headers)
    response = opener.open(center_request)
    data = response.read().decode('utf-8')
    with open('test.html','w',encoding='utf-8') as f:
        f.write(data)
posted @ 2019-04-14 10:48  折花载酒少年事  阅读(4349)  评论(0编辑  收藏  举报