使用HttpClient抓取网站首页
HttpClient是Apache开发的第三方Java库,可以用来进行网络爬虫的开发,相关API的可以在http://hc.apache.org/httpcomponents-client-ga/httpclient/apidocs/查看。
import java.io.BufferedReader; import java.io.InputStreamReader; import org.apache.http.client.methods.*; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; public class pachong { public static void main(String[] args) throws Exception { String url = "http://www.baidu.com"; CloseableHttpClient client = HttpClients.createDefault(); HttpGet request = new HttpGet(url); CloseableHttpResponse response = client.execute(request); System.out.println("Response Code: " + response.getStatusLine().getStatusCode()); BufferedReader rd = new BufferedReader( new InputStreamReader(response.getEntity().getContent())); String line = ""; while((line = rd.readLine()) != null) { System.out.println(line); } response.close(); client.close(); } }