java selenium

selenium是一种web自动化测试的工具,可以控制浏览器,进行网页操作

准备

首先,下载谷歌驱动,下载地址
http://npm.taobao.org/mirrors/chromedriver/
查看自己的谷歌浏览器版本,选择与版本最近的下载。

示例

import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.TimeUnit;
/**
* create by fzg
* 2022/6/15 10:45
*/
@SpringBootTest
public class ServerWebTests {
@Autowired
private ChromeDriver driver;
@Test
public void getContent() throws IOException {
System.setProperty("webdriver.chrome.driver", "D:/Program Files/chrome-driver/chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
driver = new ChromeDriver(chromeOptions);
driver.get("https://so.gushiwen.cn/shiwenv_45c396367f59.aspx");
System.out.println("fzg1===>" + driver.getTitle());
String title = driver.findElement(By.xpath("//*[@id=\"sonsyuanwen\"]/div[1]/h1")).getText().toString();
// 古诗的标题
System.out.println("fzg===>" + driver.findElement(By.xpath("//*[@id=\"sonsyuanwen\"]/div[1]/h1")).getText());
// 古诗内容
WebElement cont = driver.findElement(By.id("contson45c396367f59"));
System.out.println(cont.getText());
String text = cont.getText();
driver.close();
String fileName = "C:\\Users\\Asus\\Desktop\\temp\\poetry.txt";
Path path = Paths.get(fileName);
try(BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) {
writer.write(title);
}
try(BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8, StandardOpenOption.APPEND)) {
writer.write("\n" + text);
}
}
}

利用selenium可以爬取网页元素

import lombok.extern.slf4j.Slf4j;
import org.junit.Test;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import java.util.UUID;
/**
* create by fzg
* 2022/11/29 14:11
*/
@Slf4j
@SpringBootTest
public class SeleniumTest {
@Autowired
private ChromeDriver driver;
@Test
public void getImages(){
System.setProperty("webdriver.chrome.driver", "D:/Program Files/chrome-driver/chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
driver = new ChromeDriver(chromeOptions);
driver.get("https://picsum.photos/images");
List<WebElement> elements = driver.findElements(By.className("download-url"));
log.info("大小:" + elements.size());
for (WebElement element : elements) {
String imgUrl = element.getAttribute("href");
downImage(imgUrl, UUID.randomUUID().toString());
}
driver.close();
}
public void downImage(String imageUrl,String fileName) {
String file = "E:\\pictures\\java-repile-images";
File files = new File(file);
if (!files.exists()) {
files.mkdirs();
}
InputStream is;
FileOutputStream out;
try {
URL url = new URL(imageUrl);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
is = connection.getInputStream();
// 创建文件
File fileOfImg = new File(file + "/" + fileName + ".jpg");
out = new FileOutputStream(fileOfImg);
int i = 0;
while ((i = is.read()) != -1) {
out.write(i);
}
is.close();
out.close();
log.info(fileName + "下载成功");
} catch (MalformedURLException e) {
log.info("图片地址解析失败");
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

posted @   合起来的彳亍  阅读(203)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
点击右上角即可分享
微信分享提示