crawler_基础之_httpclient 访问网络资源
先粘贴一个 简单版的,后期再修改
pom文件
<dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpasyncclient</artifactId> <version>4.0-alpha3</version> <scope>compile</scope> </dependency>
1 package com.cph.utils; 2 3 import java.io.IOException; 4 5 import org.apache.http.HttpEntity; 6 import org.apache.http.HttpResponse; 7 import org.apache.http.HttpStatus; 8 import org.apache.http.client.ClientProtocolException; 9 import org.apache.http.client.methods.HttpGet; 10 import org.apache.http.client.methods.HttpUriRequest; 11 import org.apache.http.impl.client.DefaultHttpClient; 12 import org.apache.http.util.EntityUtils; 13 14 /** 15 * httpclient 帮助类<br> 16 * 4.1.2测试版 17 * 18 * @author cphmvp 19 * 20 */ 21 public class HttpClientUtil { 22 private static String encoding = "UTF-8"; 23 24 /** 25 * get方式下载 26 * 27 * @param url 28 */ 29 public static String downloadGet(String url) { 30 String htmls = null; 31 DefaultHttpClient client = new DefaultHttpClient(); 32 // 新建get请求 33 HttpUriRequest request = new HttpGet(url); 34 // 封装请求头 35 pageRequest(request); 36 // 声明响应 37 HttpResponse response = null; 38 // 响应实体 39 HttpEntity entity = null; 40 try { 41 response = client.execute(request); 42 System.out.println("响应码: " 43 + response.getStatusLine().getStatusCode()); 44 if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) { 45 entity = response.getEntity(); 46 byte[] content = EntityUtils.toByteArray(entity); 47 htmls = new String(content, encoding); 48 } 49 } catch (ClientProtocolException e) { 50 e.printStackTrace(); 51 } catch (IOException e) { 52 e.printStackTrace(); 53 } finally { 54 // 关闭客户端 55 client.getConnectionManager().shutdown(); 56 } 57 return htmls; 58 } 59 60 /** 61 * 封装请求头 62 * 63 * @param request 64 */ 65 private static void pageRequest(HttpUriRequest request) { 66 // 设置浏览器版本 67 request.setHeader( 68 "User-Agent", 69 "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; MyIE 2.0 Beta 2; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3)"); 70 // 设置cookie refer等 71 request.setHeader( 72 "Cookie", 73 "RK=hki7lw6qHP; wbilang_821910184=zh_CN; wbilang_10000=zh_CN; dm_login_weixin_rem=; dm_login_weixin_scan=; wb_regf=%3B0%3B%3Bwww.baidu.com%3B0; mb_reg_from=8; ts_last=t.qq.com/; ts_refer=search.t.qq.com/index.php; ts_uid=7492426386; wbilang_384871492=zh_CN; ts_last=1.t.qq.com/wolongxian; ts_refer=www.baidu.com/s; ts_uid=7492426386; pgv_pvid=1942759996; pgv_info=ssid=s5111200112; o_cookie=384871492; ptui_loginuin=821910184; ptisp=cnc; ptcz=9c03596fa66d550bcd5c8cd812f16ad5d6c2074604285851a218c478774eb6bb; luin=o0821910184; lskey=00010000b43bed256a14b910da63ac03a1c1a042994fea6a8a7078dcb2ea566d5dc09188883ddddd1f7feadb; pt2gguin=o0821910184; uin=o0821910184; skey=@xObtCqUUW; p_uin=o0821910184; p_skey=swqZymgXczQrTdTin9Qe44jMT5cTNoTeSzaXrxDjs3k_; pt4_token=OlMTg1UJSdPz-VzgfdEgFQ__; p_luin=o0821910184; p_lskey=000400001663db9b9783c84586b6d929044d17e291916d1cfcfb93c0f520f05e8c85adc89dffc94e52b1325e"); 74 } 75 76 public static void main(String[] args) { 77 String url = "http://www.baidu.com/"; 78 System.out.println(downloadGet(url)); 79 } 80 }
create by cphmvp
email:cphmvp@163.com
爬虫技术交流_crawler QQ群 :167047843