java-selenium下载百度图片

package download;

import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.input.ReaderInputStream;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;


public class Geturl {
	
	public static List<String> geturls(String baseUrl){
		List<String> urllist=new ArrayList<String>();
		System.setProperty("webdriver.chrome.driver", "E:\\\\webDriver\\\\chromedriverV2.28.exe");
		
		WebDriver driver = new ChromeDriver();
		driver.get(baseUrl);
		
		//获取所有img标签
		List<WebElement> imgList = driver.findElements(By.tagName("img"));
		System.out.println(imgList.size());
		
		try {
			Thread.sleep(10000);
		} catch (Exception e) {
			e.printStackTrace();
		}
		//便利所有标签
		try {
			for (WebElement a : imgList) {
				//System.out.println(a.getText());
				System.out.println(a.getAttribute("src"));//获取img标签中的data-imgurl  data-imgurl
				
				//获取img标签data-imgurl属性值
				String urlStr = a.getAttribute("src");
				/*if(urlStr.contains(".jpg")) {
					urllist.add(urlStr);
				}*/
				
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		
		return urllist;
		
	}
	

	public static void downloadImg(List<String> urllist) throws Exception{
		URL url=null;
		int imageNumber = 0;
		for(String urlString:urllist) {
			url = new URL(urlString);
			DataInputStream dis = new DataInputStream(url.openStream());
			String imageName ="C:\\Users\\0\\Pictures\\插画\\photos"+ imageNumber +".jpg";
			FileOutputStream fos =new FileOutputStream(new File(imageName));
			
			byte[] buffer = new byte[1024];
			int length;
			while((length = dis.read(buffer))>0) {
				fos.write(buffer, 0, length);
			}
			dis.close();
			fos.close();
			imageNumber++;
		}
	}
	
	
	
	public static void main(String[] args) {
		
		List<String> urllist=geturls("https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B2%E5%BB%AD&fr=ala&ala=1&alatpl=adress&pos=0&hs=2&xthttps=111111");
		System.out.println(urllist.size());
		
		try {
			downloadImg(urllist);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}
}

  

posted @ 2017-10-16 14:51  sincoolvip  阅读(1490)  评论(0编辑  收藏  举报