【Java】通用版URLConnection 带cookie下载PDF等资源文件
/**** * 下载pdf文件 */ public static void downloadNet(String urlStr, String fileName, String savePath) throws MalformedURLException { // 下载网络文件 int bytesum = 0; int byteread = 0; // System.out.println(fileName); URL url = new URL(urlStr); try { URLConnection conn = url.openConnection(); conn.setConnectTimeout(30000); conn.setReadTimeout(30000); conn.setRequestProperty("Host", "paper.cnstock.com"); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0"); conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"); conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码 conn.setRequestProperty("Content-Encoding", "utf8"); conn.setRequestProperty("Connection", "keep-alive"); conn.setRequestProperty("Upgrade-Insecure-Requests", "1"); conn.setRequestProperty("Cookie", cookie); conn.setRequestProperty("Cache-Control", "max-age=0"); conn.setRequestProperty("Content-Type", "application/pdf"); // savePage(page,savePath,fileName); InputStream inStream = conn.getInputStream(); FileOutputStream fs = new FileOutputStream(savePath + fileName); byte[] buffer = new byte[1204]; int length; while ((byteread = inStream.read(buffer)) != -1) { bytesum += byteread; // System.out.println(bytesum); fs.write(buffer, 0, byteread); } inStream.close(); fs.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
conn.setRequestProperty("Accept-Encoding", "utf8, deflate");//注意编码,gzip可能会乱码
总结:
采集文件出现异常,用记事本打开文件查看数据,与实际页面作对比,判断是否是乱码。乱码出现的很大可能就是文件的请求编码问题