Live2D

qq音乐付费爬取

使用selenium+Browsermob-Proxy实现付费爬取。(付费爬取需要有vip账号)

selenium负责实现自动化模拟点击,Browsermob-Proxy抓取请求。

分析

首先打开qq音乐网址,搜索歌曲(这里歌曲名使用了UrlEncode编码了),:https://y.qq.com/n/ryqq/search?w=美人鱼&t=song&remoteplace=txt.yqq.top

image

点击播放,然后打F12,筛选media,圈起来的地址就是我们要下载的

image

将地址粘贴出来,并点击下载

image

会得到一个.m4a的文件,我们只需要使用第三方工具ffmpeg将其转换为mp3格式即可。

源代码

爬取源代码如下

package com.watchmen.selenium;

import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.List;
import java.util.stream.Collectors;

import io.netty.handler.codec.http.HttpHeaders;
import org.openqa.selenium.*;

import io.netty.handler.codec.http.HttpRequest;
import io.netty.handler.codec.http.HttpResponse;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.client.ClientUtil;
import net.lightbody.bmp.filters.RequestFilter;
import net.lightbody.bmp.proxy.CaptureType;
import net.lightbody.bmp.util.HttpMessageContents;
import net.lightbody.bmp.util.HttpMessageInfo;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
import org.openqa.selenium.firefox.FirefoxProfile;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;

public class SeleniumBrowserMobProxy {
    public static BrowserMobProxy browserMobProxy;
    public static FirefoxDriver driver;
    public String songName;

    static {
        browserMobProxy = new BrowserMobProxyServer();
        browserMobProxy.start();
        browserMobProxy.enableHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT);
        browserMobProxy.setHarCaptureTypes(CaptureType.RESPONSE_CONTENT);
        browserMobProxy.newHar("kk");
        driver = catchConfig();
    }

    public static void main(String[] args) throws Exception {
        new SeleniumBrowserMobProxy().start();
    }

    private void start() throws Exception {

        browserMobProxy.addRequestFilter(new RequestFilter() {
            @Override
            public HttpResponse filterRequest(HttpRequest request, HttpMessageContents contents,
                                              HttpMessageInfo messageInfo) {
                // 打印浏览器请求的url和请求头
                HttpHeaders headers = request.headers();
                String dest = headers.get("Sec-Fetch-Dest");
                if (dest == null || !dest.contains("audio")) {
                    return null;
                }
                System.out.println("请求:" + request.uri());
                downloadFile("https://dl.stream.qqmusic.qq.com"+request.uri(), "D:\\app\\qqmusic\\localMusic\\" + songName + ".m4a");
                System.out.println(songName + "下载完成");
                return null;
            }
        });
        List<String> musicList = new BufferedReader(new FileReader("src/main/resources/music.txt")).lines().collect(Collectors.toList());

        for (String music : musicList) {
            catchMusic(music);
        }
    }

    private  void catchMusic(String name) throws InterruptedException {

        WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));

        driver.get("https://y.qq.com/n/ryqq/search?w=" + URLEncoder.encode(name, StandardCharsets.UTF_8) + "&t=song&remoteplace=txt.yqq.top");

        WebElement songList = wait.until(ExpectedConditions.visibilityOfElementLocated(By.className("songlist__list")));
        WebElement openDiv = songList.findElements(By.tagName("li"))
                .get(0)
                .findElement(By.className("mod_list_menu"));

        Actions actions2 = new Actions(driver);
        actions2.moveToElement(openDiv).perform();

        Thread.sleep(1500);
        // 定义js
        JavascriptExecutor js = (JavascriptExecutor) driver;
        WebElement open = openDiv.findElement(By.className("list_menu__icon_play"));
        js.executeScript("arguments[0].click();", open);
        songName=name;
    }

    private static void downloadFile(String fileURL, String saveDir) {
        try {
            URL url = new URL(fileURL);
            URLConnection connection = url.openConnection();
            InputStream inputStream = new BufferedInputStream(connection.getInputStream());
            try (FileOutputStream outputStream = new FileOutputStream(saveDir)) {
                byte[] buffer = new byte[1024];
                int bytesRead;
                while ((bytesRead = inputStream.read(buffer)) != -1) {
                    outputStream.write(buffer, 0, bytesRead);
                }

                System.out.println("File downloaded to: " + saveDir);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void login(WebDriverWait wait) {
        wait.until(ExpectedConditions.visibilityOfElementLocated(By.className("login-box-tit")));
        // 切换登录界面
        driver.switchTo().frame("login_frame");

        wait.until(ExpectedConditions.visibilityOfElementLocated(By.className("lay_login_form")));
        driver.switchTo().frame("ptlogin_iframe");
        // 等待 ptlogin_iframe 页面加载完成
        wait.until(ExpectedConditions.presenceOfElementLocated(By.id("u")));

        // 切换到密码登录
        WebElement passLogin = driver.findElement(By.id("switcher_plogin"));
        Actions actions = new Actions(driver);
        actions.moveToElement(passLogin).perform();

        passLogin.click();

        // 等待密码登录元素可见
        wait.until(ExpectedConditions.visibilityOfElementLocated(By.id("p")));
        // 输入账号
        WebElement username = wait.until(ExpectedConditions.visibilityOfElementLocated(By.id("u")));
        username.sendKeys("");
        // 输入密码
        driver.findElement(By.id("p")).sendKeys("");
        // 点击登录
        driver.findElement(By.id("login_button"));
    }

    private static FirefoxDriver catchConfig() {
        System.setProperty("webdriver.gecko.driver", "D:\\app\\WebDriver\\geckodriver-v0.31.0-win64\\geckodriver.exe");
        FirefoxOptions options = new FirefoxOptions();
        FirefoxProfile profile = new FirefoxProfile();
        Proxy seleniumProxy = ClientUtil.createSeleniumProxy(browserMobProxy);
        options.setProxy(seleniumProxy);
        options.setAcceptInsecureCerts(true);
        // 设置火狐浏览器路径
        options.setBinary("D:\\app\\firefox\\firefox.exe");
        //禁止GPU渲染
        options.addArguments("--disable-gpu");
//        options.addArguments("--headless");
        //忽略错误
        options.addArguments("ignore-certificate-errors");
        //禁止浏览器被自动化的提示
        options.addArguments("--disable-infobars");
        //反爬关键:window.navigator.webdrive值=false*********************
        options.addPreference("dom.webdriver.enabled", false);
        //设置请求头
        profile.setPreference(
                "general.useragent.override",
                "Mozilla/5.0(iPhone;CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML,like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
        );

        return new FirefoxDriver(options);
    }
}

txt文件就是要搜索的歌曲名

maven依赖如下

   <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <guava.version>25.0-jre</guava.version>
        <maven.compiler.target>8</maven.compiler.target>
        <browsermob.version>2.1.0</browsermob.version>
        <selenium.version>3.141.0</selenium.version>
        <hutool.version>5.8.2</hutool.version>
        <jsoup.version>1.5.2</jsoup.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>net.lightbody.bmp</groupId>
            <artifactId>browsermob-core</artifactId>
            <version>${browsermob.version}</version>
        </dependency>
        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>${hutool.version}</version>
        </dependency>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>${jsoup.version}</version>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>${guava.version}</version>
        </dependency>
        <dependency>
            <groupId>net.lightbody.bmp</groupId>
            <artifactId>browsermob-legacy</artifactId>
            <version>${browsermob.version}</version>
        </dependency>

        <dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-java</artifactId>
            <version>${selenium.version}</version>
        </dependency>
    </dependencies>
posted @ 2024-01-17 16:49  没有梦想的java菜鸟  阅读(431)  评论(0编辑  收藏  举报