【Java】通用版URLConnection 带cookie下载PDF等资源文件

/****
	 * 下载pdf文件
	 */
	public static void downloadNet(String urlStr, String fileName, String savePath) throws MalformedURLException {
		// 下载网络文件
		int bytesum = 0;
		int byteread = 0;
		// System.out.println(fileName);

		URL url = new URL(urlStr);

		try {
			URLConnection conn = url.openConnection();
			conn.setConnectTimeout(30000);
			conn.setReadTimeout(30000);
			conn.setRequestProperty("Host", "paper.cnstock.com");
			conn.setRequestProperty("User-Agent",
					"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0");
			conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
			conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
			conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码
			conn.setRequestProperty("Content-Encoding", "utf8");
			conn.setRequestProperty("Connection", "keep-alive");
			conn.setRequestProperty("Upgrade-Insecure-Requests", "1");
			conn.setRequestProperty("Cookie", cookie);
			conn.setRequestProperty("Cache-Control", "max-age=0");
			conn.setRequestProperty("Content-Type", "application/pdf");

			// savePage(page,savePath,fileName);

			InputStream inStream = conn.getInputStream();
			FileOutputStream fs = new FileOutputStream(savePath + fileName);

			byte[] buffer = new byte[1204];
			int length;
			while ((byteread = inStream.read(buffer)) != -1) {
				bytesum += byteread;
				// System.out.println(bytesum);
				fs.write(buffer, 0, byteread);
			}
			inStream.close();
			fs.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

  conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码

  总结:

采集文件出现异常,用记事本打开文件查看数据,与实际页面作对比,判断是否是乱码。乱码出现的很大可能就是文件的请求编码问题

posted @ 2017-06-14 08:53  陈泽泽  阅读(1622)  评论(0编辑  收藏  举报