jsoup之下载图片和小说
一:下载图片
public static void main(String[] args) { try { //属性 Properties p = System.getProperties() ; p.getProperty("proxy","true" ) ;//代理 p.getProperty("http.proxyHost", "118.254.147.6") ;//代理ip p.getProperty("http.proxyPort","3128" ) ;//代理端口 Map<String,String> map = new HashMap<String,String>() ; //模拟浏览器访问 map.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8") ; map.put("Accept-Encoding", "gzip, deflate") ; map.put("Accept-Language", "zh-CN,zh;q=0.9") ; map.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3253.3 Safari/537.36") ; //根据地址,得到整个页面 Document d = Jsoup.connect("http://www.ivsky.com/tupian/ziranfengguang").headers(map).get() ; //获取需要的元素 Elements e = d.select(".ali li img") ; //遍历元素,获取需要的数据 for (Element element : e) { //得到图片地址 String path = element.attr("src") ; //创建URL对象 URL url = new URL(path) ; //打开链接 HttpURLConnection h = (HttpURLConnection) url.openConnection() ; //设置请求方式为"GET" h.setRequestMethod("GET"); //超时响应时间为5秒 h.setConnectTimeout(5000); //通过输入流获取图片数据 InputStream in = h.getInputStream() ; //得到图片的二进制数据,以二进制封装得到数据,具有通用性 byte[] b = getInputStream(in) ; //保存位置,图片名字 File f = new File("f:\\小说下载\\图片.jpg") ; //创建输出流 OutputStream out = new FileOutputStream(f) ; //写入数据 out.write(b); //关闭输出流 out.close(); } } catch (IOException e) { e.printStackTrace(); } } //得到图片的二进制数据 public static byte[] getInputStream(InputStream in){ //创建缓冲区 byte[] b = new byte[1024] ; //创建输出流 ByteArrayOutputStream out = new ByteArrayOutputStream() ; int n = 0 ; try { //循环读取数据 while((n=in.read(b))!=-1){ //写入数据到byte[]中 out.write(b, 0, n); } } catch (IOException e) { e.printStackTrace(); }finally{ try { in.close(); } catch (IOException e) { e.printStackTrace(); } } return out.toByteArray() ; }
二:下载小说
Properties p = System.getProperties() ; p.getProperty("proxy","true" ) ; p.getProperty("http.proxyHost", "118.254.147.6") ; p.getProperty("http.proxyPort","3128" ) ; try { Map<String,String> map = new HashMap<String,String>() ; map.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8") ; map.put("Accept-Encoding", "gzip, deflate") ; map.put("Accept-Language", "zh-CN,zh;q=0.9") ; map.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3253.3 Safari/537.36") ; Document d = Jsoup.connect("http://www.23us.so/files/article/html/17/17157/index.html") .headers(map) .get(); Elements e = d.select(".L") ; for (Element element : e) { String url = element.select("a").attr("href"); Document document = Jsoup.connect(url).get() ; String content = document.select("#contents").text() ; String title = document.select("#a_main h1").text() ; //小说保存路径 String path = "f:\\小说下载\\"+title+".txt" ; //创建文件 File file = new File(path) ; if(!file.exists()){ //文件不存在就创建 file.getParentFile().mkdirs(); }
file.createNewFile() ; //创建输出流 OutputStream out = new FileOutputStream(file) ; //得到标题的二进制数据 byte[] b = title.getBytes(); //输出标题到文件中 out.write(b); //得到内容的二进制 byte[] by = content.getBytes() ; //输出内容到文件中 out.write(by); } } catch (IOException e) { e.printStackTrace(); }