import requests
import os

headers={
    'Cookie': '_ga=GA1.2.701818100.1612092981; _gid=GA1.2.748589379.1612092981; Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1612092982; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1612094717; kw_token=ZALW965FXG',
    'csrf': 'ZALW965FXG',
    'Host': 'www.kuwo.cn',
    'Referer': 'https://www.kuwo.cn/singer_detail/1600',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
    }
if not  os.path.exists('mics'):
    os.mkdir('mics')
def Index(page):
    # url ='https://www.kuwo.cn/api/www/artist/artistMusic?artistid=1600&pn='+ str(page) + '&rn=30&httpsStatus=1&reqId=9d0df070-63bc-11eb-8632-19dcd503126a'
    url='https://www.kuwo.cn/api/www/artist/artistMusic?artistid=1600&pn='+ str(page) + '&rn=30&httpsStatus=1&reqId=50b03180-63ca-11eb-b714-332080487537'

    response = requests.get(url=url,headers=headers).json()
    musicList = response['data']['list']
    print(musicList)
    for music in musicList:
        rid=music['rid']
        name=music['name']
        musicSave(rid,name)


def musicSave(rid,name):
    # url='https://www.kuwo.cn/url?format=mp3&rid='+ str(rid) + '&response=url&type=convert_url3&br=128kmp3&from=web&t=1612094725726&httpsStatus=1&reqId=9a3777e1-63bc-11eb-8632-19dcd503126a'
    url='https://www.kuwo.cn/url?format=mp3&rid='+ str(rid) + '&response=url&type=convert_url3&br=128kmp3&from=web&t=1612100615341&httpsStatus=1&reqId=50b38ce1-63ca-11eb-b714-332080487537'
    response=requests.get(url=url,headers=headers).json()
    mp3path=response['url']
    print(mp3path)
    data = requests.get(url=mp3path).content   *****

    # 文件存储
    # a 追加 b进制读写(音乐文件是字节数据)
    print(mp3path)
    with open('mics\{}.mp3'.format(name),'ab') as f:
        f.write(data)
        print('{}.mp3已经下载完成',format(name))



for page in range(1,11):
    Index(page)

1.地址,文件地址和播放地址需要抓取

2.'Cookie':  'csrf'网页刷新后需要更新,大量爬虫可以使用代理ip和伪造User-Agent,或者js逆向后续更新。

出现错误,data = requests.get(url=mp3path).content  五颗红星 原来哪里我添加headers后get不了导致失败,后面把headers去掉后就能用了

  data = requests.get(mp3path,headers=headers).content

 

 

1.演示一下用免费代理ip爬虫

import urllib.request

def creat_proxy_handler():
    url="https://www.baidu.com"
    # 添加代理
    proxy_list=[
        {"http":"60.168.207.219:9999"},
        {"http":"58.23.67.208:9999"},
        {"http":"42.7.28.217:9999"},
        {"http":"61.145.49.177:9999"},
        {"http":"36.250.156.78:9999"},
        {"http":"36.248.133.145:9999"},
        {"http":"42.56.238.117:9999"},
        {"http":"36.249.119.34:9999"},
        {"http":"58.22.177.60:9999"}
    ]
    for proxy in proxy_list:
        print(proxy)
        # 遍历出来的ip创建处理器
        # 代理处理器
        proxy_handler=urllib.request.ProxyHandler(proxy)
        #创建自己的opener
        opener=urllib.request.build_opener(proxy_handler)   
        try:
            # 拿着代理ip去发送请求
            data = opener.open(url,timeout=1).read()
            print("haha")
        except Exception as e:
            print(e)    

creat_proxy_handler()

带着cookie去自动登录

import urllib.request
from http import cookiejar
from urllib import parse
"""
直接获取 个人中心
1代码登录
2.自动带着cookies

1.代码登录 
    1.1登录的网址
    login_url ='https://www.yaozh.com/login'
    1.2登录的参数
    1.3发送登录请求

2.代码带着cookes 访问   
"""
login_url ='https://www.yaozh.com/login'
login_from_data={
    "username":"xiaomaoera12",
    "pwd":"lina081012",
    "formhash":"89B42EA5FF",
    "backurl":"https%3A%2F%2Fjob.yaozh.com%2FtopicComp%2F14"
}
# 1.3发送登录请求POST
cook_jar = cookiejar.CookieJar()
# 定义有添加cook功能的处理器
cook_hanlder = urllib.request.HTTPCookieProcessor(cook_jar)
# 根据处理器生成opener
opener = urllib.request.build_opener(cook_hanlder)
headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
}
login_str = parse.urlencode(login_from_data).encode("utf-8")
login_request= urllib.request.Request(login_url,headers=headers,data=login_str)
opener.open(login_request)

center_url="https://www.yaozh.com/member/"
center_request = urllib.request.Request(center_url,headers=headers)
response = opener.open(center_url)

data=response.read()
print(data)
with open('02cook.html','wb') as f:
    f.write(data)