java 下载网络图片 20251682编辑

Heaven helps those who help themselves
资深码农+深耕理财=财富自由
欢迎关注

java 下载网络图片

Created by Marydon on 2017-09-30 11:25

说明：根据网络URL获取该网页上面所有的img标签并下载符合要求的所有图片

所需jar包：jsoup.jar

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 图片批量下载工具类
 * @author Marydon
 * @create time 2016-9-3下午2:01:03 
 * @update time 2017年9月30日11:07:02
 * @E-mail:dellshouji@163.com
 */
public class ImgDownloadUtil {

    /**
     * 根据URL获取网页DOM对象
     * @param url
     *            网址
     * @return DOM对象
     */
    public static Document getHtmlDocument(String url) {
        Document document = null;
        URL urlObj = null;
        try {
            // 1.建立网络连接
            urlObj = new URL(url);
            // 2.根据url获取Document对象
            document = Jsoup.parse(urlObj, 5000);// 单位：毫秒超时时间

        } catch (MalformedURLException e) {
            System.out.println("世界上最遥远的距离就是没有网，检查设置！");
            e.printStackTrace();
        } catch (IOException e) {
            System.out.println("您的网络连接打开失败，请稍后重试！");
            e.printStackTrace();
        }

        return document;
    }

    /**
     * 根据URL获取网页源码
     * @param url
     *            网址
     * @return 网页源码
     */
    public static String getHtmlText(String url) {
        String htmlText = "";
        Document document = null;
        URL urlObj = null;
        try {
            // 1.建立网络连接
            urlObj = new URL(url);
            // 2.根据url获取Document对象
            document = Jsoup.parse(urlObj, 5000);// 单位：毫秒超时时间
            // 3.根据dom对象获取网页源码
            htmlText = document.html();
        } catch (MalformedURLException e) {
            System.out.println("世界上最遥远的距离就是没有网，检查设置！");
            e.printStackTrace();
        } catch (IOException e) {
            System.out.println("您的网络连接打开失败，请稍后重试！");
            e.printStackTrace();
        }

        return htmlText;
    }

    /**
     * 操作Dom对象获取图片地址
     * @param document
     *            Dom对象
     * @return 图片地址集合
     */
    public static List<String> getImgAddressByDom(Document document) {
        // 用于存储图片地址
        List<String> imgAddress = new ArrayList<String>();
        if (null != document) {
            // <img src="" alt="" width="" height=""/>
            // 获取页面上所有的图片元素
            Elements elements = document.getElementsByTag("img");
            String imgSrc = "";
            // 迭代获取图片地址
            for (Element el : elements) {
                imgSrc = el.attr("src");
                // imgSrc的内容不为空，并且以http://开头
                if ((!imgSrc.isEmpty()) && imgSrc.startsWith("http://")) {
                    // 将有效图片地址添加到List中
                    imgAddress.add(imgSrc);
                }
            }
        }

        return imgAddress;
    }

    /**
     * 根据网络URL下载文件
     * @param url
     *            文件所在地址
     * @param fileName
     *            指定下载后该文件的名字
     * @param savePath
     *            文件保存根路径
     */
    public static void downloadFileByUrl(String url, String fileName, String savePath) {
        URL urlObj = null;
        URLConnection conn = null;
        InputStream inputStream = null;
        BufferedInputStream bis = null;
        OutputStream outputStream = null;
        BufferedOutputStream bos = null;
        try {
            // 1.建立网络连接
            urlObj = new URL(url);
            // 2.打开网络连接
            conn = urlObj.openConnection();
            // 设置超时间为3秒
            conn.setConnectTimeout(3 * 1000);
            // 防止屏蔽程序抓取而返回403错误
            conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
            // 3.得到输入流
            inputStream = conn.getInputStream();
            bis = new BufferedInputStream(inputStream);

            // 文件保存位置
            File saveDir = new File(savePath);
            if (!saveDir.exists()) {
                saveDir.mkdirs();
            }
            // 文件的绝对路径
            String filePath = savePath + File.separator + fileName;
            File file = new File(filePath);
            // 4.
            outputStream = new FileOutputStream(file);
            bos = new BufferedOutputStream(outputStream);
            byte[] b = new byte[1024];
            int len = 0;
            while ((len = bis.read(b)) != -1) {
                bos.write(b, 0, len);
            }
            System.out.println("info:" + url + " download success,fileRename=" + fileName);
        } catch (MalformedURLException e) {
            System.out.println("世界上最遥远的距离就是没有网，检查设置");
            System.out.println("info:" + url + " download failure");
            e.printStackTrace();
        } catch (IOException e) {
            System.out.println("您的网络连接打开失败，请稍后重试！");
            System.out.println("info:" + url + " download failure");
            e.printStackTrace();
        } finally {// 关闭流
            try {
                if (bis != null) {// 关闭字节缓冲输入流
                    bis.close();
                }

                if (inputStream != null) {// 关闭字节输入流
                    inputStream.close();
                }
                if (bos != null) {// 关闭字节缓冲输出流
                    bos.close();
                }
                if (outputStream != null) {// 关闭字节输出流
                    outputStream.close();
                }

            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

测试

public static void main(String[] args) {
    // 1.确定网址
    String url = "http://www.cnblogs.com/Marydon20170307/p/7402871.html";
    // 2.获取该网页的Dom对象
    Document document = getHtmlDocument(url);
    // 3.获取该网页所有符合要求的图片地址
    List<String> imgAddresses = getImgAddressByDom(document);
    String imgName = "";
    String imgType = "";
    // 4.设置图片保存路径
    String savePath = "C:/Users/Marydon/Desktop";
    // 5.批量下载图片
    for (String imgSrc : imgAddresses) {
        // 5.1图片命名：图片名用32位字符组成的唯一标识
        imgName = UUID.randomUUID().toString().replace("-", "");
        // 5.2图片格式（类型）
        imgType = imgSrc.substring(imgSrc.lastIndexOf("."));
        imgName += imgType;
        // 5.3下载该图片
        downloadFileByUrl(imgSrc, imgName, savePath);
    }
}

与君共勉：最实用的自律是攒钱，最养眼的自律是健身，最健康的自律是早睡，最改变气质的自律是看书，最好的自律是经济独立。

您的一个点赞，一句留言，一次打赏，就是博主创作的动力源泉！

↓↓↓↓↓↓写的不错，对你有帮助？赏博主一口饭吧↓↓↓↓↓↓

posted @ 2017-09-30 11:25 Marydon 阅读(1682) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

阅读排行：
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型，支持深度思考和联网搜索！
· 基于 Docker 搭建 FRP 内网穿透开源项目（很简单哒）
· ollama系列01：轻松3步本地部署deepseek，普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现

java 下载网络图片 20251682编辑

Heaven helps those who help themselves
资深码农+深耕理财=财富自由
欢迎关注

java 下载网络图片

Created by Marydon on 2017-09-30 11:25

公告

搜索

常用链接

最新随笔

我的标签

随笔分类

阅读排行榜

java 下载网络图片 20251682编辑

Heaven helps those who help themselves资深码农+深耕理财=财富自由欢迎关注

java 下载网络图片

Created by Marydon on 2017-09-30 11:25

相关推荐：

公告

搜索

常用链接

最新随笔

我的标签

随笔分类

阅读排行榜

Heaven helps those who help themselves
资深码农+深耕理财=财富自由
欢迎关注