爬虫小例子
package com.textPa.two; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.nio.charset.Charset; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; public class RetrivePage { public static void main(String[] args) { CloseableHttpClient httpClient = HttpClients.createDefault(); // HttpGet getHttp = new HttpGet("http://www.baidu.com"); HttpGet getHttp = new HttpGet("http://club.news.sohu.com/zz0578/thread/4bqnexpi3no"); String content = null; BufferedWriter writer = null; HttpResponse response; try { response = httpClient.execute(getHttp); HttpEntity entity = response.getEntity(); if(entity!=null){ content = EntityUtils.toString(entity,Charset.forName("GBK")); System.out.println(content); File file = new File("d:\\baidu.html"); writer = new BufferedWriter(new FileWriter(file)); writer.write(content); writer.flush(); writer.close(); System.out.println("创建成功"); } }catch (Exception e) { // TODO: handle exception } } }
所需要的两个jar包我后面会贴出来
http://pan.baidu.com/s/1nuFuDUL