java selenium
selenium是一种web自动化测试的工具,可以控制浏览器,进行网页操作
准备
首先,下载谷歌驱动,下载地址
http://npm.taobao.org/mirrors/chromedriver/
查看自己的谷歌浏览器版本,选择与版本最近的下载。
示例
import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.openqa.selenium.By; import org.openqa.selenium.Cookie; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.Iterator; import java.util.Set; import java.util.concurrent.TimeUnit; /** * create by fzg * 2022/6/15 10:45 */ @SpringBootTest public class ServerWebTests { @Autowired private ChromeDriver driver; @Test public void getContent() throws IOException { System.setProperty("webdriver.chrome.driver", "D:/Program Files/chrome-driver/chromedriver.exe"); ChromeOptions chromeOptions = new ChromeOptions(); driver = new ChromeDriver(chromeOptions); driver.get("https://so.gushiwen.cn/shiwenv_45c396367f59.aspx"); System.out.println("fzg1===>" + driver.getTitle()); String title = driver.findElement(By.xpath("//*[@id=\"sonsyuanwen\"]/div[1]/h1")).getText().toString(); // 古诗的标题 System.out.println("fzg===>" + driver.findElement(By.xpath("//*[@id=\"sonsyuanwen\"]/div[1]/h1")).getText()); // 古诗内容 WebElement cont = driver.findElement(By.id("contson45c396367f59")); System.out.println(cont.getText()); String text = cont.getText(); driver.close(); String fileName = "C:\\Users\\Asus\\Desktop\\temp\\poetry.txt"; Path path = Paths.get(fileName); try(BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { writer.write(title); } try(BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8, StandardOpenOption.APPEND)) { writer.write("\n" + text); } } }
利用selenium可以爬取网页元素
import lombok.extern.slf4j.Slf4j; import org.junit.Test; import org.openqa.selenium.By; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.List; import java.util.UUID; /** * create by fzg * 2022/11/29 14:11 */ @Slf4j @SpringBootTest public class SeleniumTest { @Autowired private ChromeDriver driver; @Test public void getImages(){ System.setProperty("webdriver.chrome.driver", "D:/Program Files/chrome-driver/chromedriver.exe"); ChromeOptions chromeOptions = new ChromeOptions(); driver = new ChromeDriver(chromeOptions); driver.get("https://picsum.photos/images"); List<WebElement> elements = driver.findElements(By.className("download-url")); log.info("大小:" + elements.size()); for (WebElement element : elements) { String imgUrl = element.getAttribute("href"); downImage(imgUrl, UUID.randomUUID().toString()); } driver.close(); } public void downImage(String imageUrl,String fileName) { String file = "E:\\pictures\\java-repile-images"; File files = new File(file); if (!files.exists()) { files.mkdirs(); } InputStream is; FileOutputStream out; try { URL url = new URL(imageUrl); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); is = connection.getInputStream(); // 创建文件 File fileOfImg = new File(file + "/" + fileName + ".jpg"); out = new FileOutputStream(fileOfImg); int i = 0; while ((i = is.read()) != -1) { out.write(i); } is.close(); out.close(); log.info(fileName + "下载成功"); } catch (MalformedURLException e) { log.info("图片地址解析失败"); e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了