抓取页面图片元素并保存到本机电脑

在这里主要通过流分析,通过java模拟访问页面获取到页面的html元素,并通过jsoup来分析获取到的html元素,

然后通过流处理来将图片保存到本机

package getpicture;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Scanner;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
 

public class getPicture {
    
    public static void main(String[] args) {
        new Thread(new Spider()).start();
    }
}
 
// 抓网页, 并分析出图片地址
class Spider implements Runnable {
    private String firstUrl = "http://jandan.net/ooxx/page-"; //1111#comments
    private String connUrl = "#comments";
    private int beginIndex = 1115;
    private String preHtml;
    //private String testPath="http://www.mop.com/#";
    private String mSavePath;
     
    public Spider() {};
     
    @Override
    public void run() {
        try {
            URL newURL = new URL(firstUrl + beginIndex + connUrl);
            //URL newURL = new URL(testPath);
            HttpURLConnection conn = (HttpURLConnection) newURL.openConnection();
            conn.setRequestProperty("Connection","keep-alive");
            conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36");
            conn.setDoInput(true);
            conn.setDoOutput(true);
            OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream(),"utf-8");
            out.flush();
            out.close(); 
            InputStream inputStream = conn.getInputStream();
            BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "utf-8"));
            String line;
            //读取页面html元素
            while ((line = reader.readLine()) != null) { 
                preHtml+=line; 
            }
            System.out.println(preHtml);
            //当页面访问成功时,解析页面元素,获取页面图片元素
            if(conn.getResponseCode()==200){
                Document doc=Jsoup.parse(preHtml);
                Elements elements = doc.select(".row img");
                for(Element e : elements) {
                    String imgSrc = e.attr("src");
                    new Thread(new DownloadImage(imgSrc)).start();
                }
            }
        }catch(Exception e) {
            e.printStackTrace();
        }
    }
}
 

class DownloadImage implements Runnable {
    private String imageSrc;
    private String imageName;
    public DownloadImage(String imageSrc) {
        this.imageSrc = imageSrc;
    }
    
    @Override
    public void run() {
        String[] splits = imageSrc.split("/");
        imageName = splits[splits.length - 1];
        Date date=new Date();
        SimpleDateFormat sdf=new SimpleDateFormat("yyyyMMdd");       
        String random=sdf.format(date);
        File file = new File("E:\\picture\\"+sdf+"\\"+imageName);
         // 如果路径不存在,则创建  
        if (!file.getParentFile().exists()) {  
            file.getParentFile().mkdirs();  
        } 
        //判断文件是否存在,不存在就创建文件
        if(!file.exists()&& !file .isDirectory()) {
            try {
                file.createNewFile();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }      
        System.out.println("开始下载图片:" + imageName);        
        try {
            URL newURL = new URL("http:"+imageSrc);
            HttpURLConnection conn = (HttpURLConnection) newURL.openConnection();
            conn.setRequestProperty("Connection","keep-alive");
            conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36");
            conn.setDoInput(true);
            conn.setDoOutput(true);
            //通过输入流获取图片数据
            InputStream inputStream = conn.getInputStream();
            //BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
            byte[] data=new byte[1024];
            //创建输出流   
            FileOutputStream fos = new FileOutputStream(file);         
            int len = 0;             
            //使用一个输入流从buffer里把数据读取出来  
            while( (len=inputStream.read(data)) != -1 ){  
                //用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度  
                fos.write(data, 0, len);  
            } 
            fos.flush();
            fos.close();
            System.out.println("下载完成:" + imageName);
        }catch(Exception e) {
            System.err.println(" 这个图片下载不了哇!\n删除妹子" + imageName);
            return;
        }
    }
}
View Code

 

posted @ 2017-03-21 17:04  一念花开满天下  阅读(365)  评论(0编辑  收藏  举报