1.使用Selenium爬取信息
import com.oasis.mdata.entities.GameInfo
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.select.Elements
import org.openqa.selenium.By
import org.openqa.selenium.firefox.FirefoxDriver
import org.openqa.selenium.firefox.FirefoxOptions
import org.openqa.selenium.firefox.FirefoxProfile
class Selenium {
var url = "https://www.qimai.cn/rank/index/brand/grossing/device/iphone/country/us/genre/6014/date/"
fun gameInformation(keyword: String): MutableList<GameInfo> {
System.setProperty("webdriver.gecko.driver", "/usr/local/bin/geckodriver")
var options = FirefoxOptions()
val profile = FirefoxProfile()
options.addArguments("--disable-gpu")
options.addArguments("--headless")
options.addArguments("ignore-certificate-errors")
options.addArguments("--disable-infobars")
options.addPreference("dom.webdriver.enabled", false)
profile.setPreference(
"general.useragent.override",
"Mozilla/5.0(iPhone;CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML,like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
)
options.profile = profile
options.addPreference("network.http.use-cache", false)
options.addPreference("browser.cache.memory.enable", false)
options.addPreference("browser.cache.disk.enable", false)
options.addPreference("browser.sessionhistory.max_total_viewers", 3)
options.addPreference("network.dns.disableIPv6", true)
options.addPreference("Content.notify.interval", 750000)
options.addPreference("content.notify.backoffcount", 3)
options.addPreference("network.http.pipelining", true)
options.addPreference("network.http.proxy.pipelining", true)
options.addPreference("network.http.pipelining.maxrequests", 32)
val driver = FirefoxDriver(options)
driver.executeScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
driver.get("$url$keyword")
driver.executeScript("window.scrollTo(0,100000)")
Thread.sleep(4000)
val dom = Jsoup.parse(driver.pageSource)
val div = dom.getElementsByClass("info")
val flag = exist(div, dom, driver)
return if (!flag) {
getGameInfo(driver, keyword, div)
} else {
driver.executeScript("window.scrollTo(0,100000)")
Thread.sleep(2000)
val dom = Jsoup.parse(driver.pageSource)
val div = dom.getElementsByClass("info")
getGameInfo(driver, keyword, div)
}
driver.close()
}
fun exist(div: Elements, dom: Document, driver: FirefoxDriver): Boolean {
var flag = false
if (div.size > 0) {
} else {
val loginDiv = dom.getElementsByClass("login-tip")[0]
val loginUrl = "https://www.qimai.cn${loginDiv.select("a")[0].attr("href")}"
Thread.sleep(2000)
driver.get(loginUrl)
Thread.sleep(2000)
val username=driver.findElement(By.xpath("//input[@placeholder='请输入手机号/邮箱']"))
val password = driver.findElement(By.xpath("//input[@placeholder='请输入密码']"))
val loginButton = driver.findElement(By.xpath("//div[@class='signin-btn']"))
username.sendKeys("13037117092")
password.sendKeys("wl990922")
loginButton.click()
flag = true
}
return flag
}
fun getGameInfo(driver: FirefoxDriver, keyword: String, div: Elements): MutableList<GameInfo> {
val list = ArrayList<GameInfo>()
var index = 1
div.map {
val gameName = it.select("p")[0].text()
list.add(GameInfo(sort = index, name = gameName, dateTime = keyword))
index++
}
return list
}
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)