Java下载网页HTML代码 - 高飞航的日志 - 网易博客
标签:java 下载 获取 html 代码
通过URL获取网页的HTML代码。
import java.io.BufferedReader;import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class HTMLSpirit {
public static String getHTML(String pageURL, String encoding) {
StringBuilder pageHTML = new StringBuilder();
try {
URL url = new URL(pageURL);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "MSIE 7.0");
BufferedReader br = new BufferedReader(new InputStreamReader(connection.getInputStream(), encoding));
String line = null;
while ((line = br.readLine()) != null) {
pageHTML.append(line);
pageHTML.append("\r\n");
}
connection.disconnect();
} catch (Exception e) {
e.printStackTrace();
}
return pageHTML.toString();
}
public static void main(String args[]){
System.out.println(getHTML("http://www.baidu.com", "GB2312"));
}
}