1.使用Selenium爬取信息
import com.oasis.mdata.entities.GameInfo
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.select.Elements
import org.openqa.selenium.By
import org.openqa.selenium.firefox.FirefoxDriver
import org.openqa.selenium.firefox.FirefoxOptions
import org.openqa.selenium.firefox.FirefoxProfile
/**
*@author 没有梦想的java菜鸟
* @date 2022/03/02 11:48 上午
*/
class Selenium {
var url = "https://www.qimai.cn/rank/index/brand/grossing/device/iphone/country/us/genre/6014/date/"
fun gameInformation(keyword: String): MutableList<GameInfo> {
System.setProperty("webdriver.gecko.driver", "/usr/local/bin/geckodriver")
// System.setProperty("webdriver.chrome.driver", "/usr/local/bin/chromedriver")
var options = FirefoxOptions()
val profile = FirefoxProfile()
// var options = ChromeOptions()
// val profile = ChromeProfile()
//禁止GPU渲染
options.addArguments("--disable-gpu")
options.addArguments("--headless")
//忽略错误
options.addArguments("ignore-certificate-errors")
//禁止浏览器被自动化的提示
options.addArguments("--disable-infobars")
//反爬关键:window.navigator.webdrive值=false*********************
options.addPreference("dom.webdriver.enabled", false)
//设置请求头
profile.setPreference(
"general.useragent.override",
"Mozilla/5.0(iPhone;CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML,like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
)
// profile.setPreference(
// "general.useragent.override",
// "Mozilla/5.0 (Linux; Android 4.1.1; GT-N7100 Build/JRO03C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3")
// 代理ip
// val proxyStr="202.104.45.154:64257"
// val proxy= Proxy().setHttpProxy(proxyStr).setSslProxy(proxyStr)
// options.setProxy(proxy)
options.profile = profile
//禁用缓存
options.addPreference("network.http.use-cache", false)
options.addPreference("browser.cache.memory.enable", false)
options.addPreference("browser.cache.disk.enable", false)
options.addPreference("browser.sessionhistory.max_total_viewers", 3)
options.addPreference("network.dns.disableIPv6", true)
options.addPreference("Content.notify.interval", 750000)
options.addPreference("content.notify.backoffcount", 3)
options.addPreference("network.http.pipelining", true)
options.addPreference("network.http.proxy.pipelining", true)
options.addPreference("network.http.pipelining.maxrequests", 32)
val driver = FirefoxDriver(options)
driver.executeScript("Object.defineProperty(navigator, 'webdriver', {get: () => false})")
driver.get("$url$keyword")
// driver.get("https://www.baidu.com/")
driver.executeScript("window.scrollTo(0,100000)")
Thread.sleep(4000)
val dom = Jsoup.parse(driver.pageSource)
// println(dom)
val div = dom.getElementsByClass("info")
val flag = exist(div, dom, driver)
return if (!flag) {
getGameInfo(driver, keyword, div)
} else {
driver.executeScript("window.scrollTo(0,100000)")
Thread.sleep(2000)
val dom = Jsoup.parse(driver.pageSource)
val div = dom.getElementsByClass("info")
getGameInfo(driver, keyword, div)
}
driver.close()
}
fun exist(div: Elements, dom: Document, driver: FirefoxDriver): Boolean {
var flag = false
if (div.size > 0) {
} else {
val loginDiv = dom.getElementsByClass("login-tip")[0]
val loginUrl = "https://www.qimai.cn${loginDiv.select("a")[0].attr("href")}"
Thread.sleep(2000)
driver.get(loginUrl)
Thread.sleep(2000)
// val username = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[1]/ul/li[1]/input"))
// val password = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[1]/ul/li[2]/input"))
// val loginButton = driver.findElement(By.xpath("/html/body/div[2]/div[4]/div/div[2]/div[2]"))
val username=driver.findElement(By.xpath("//input[@placeholder='请输入手机号/邮箱']"))
val password = driver.findElement(By.xpath("//input[@placeholder='请输入密码']"))
val loginButton = driver.findElement(By.xpath("//div[@class='signin-btn']"))
username.sendKeys("13037117092")
password.sendKeys("wl990922")
loginButton.click()
flag = true
}
return flag
}
fun getGameInfo(driver: FirefoxDriver, keyword: String, div: Elements): MutableList<GameInfo> {
val list = ArrayList<GameInfo>()
var index = 1
div.map {
val gameName = it.select("p")[0].text()
list.add(GameInfo(sort = index, name = gameName, dateTime = keyword))
index++
}
return list
}
}