利用HttpClient4进行网络通讯

一、HttpClient介绍

虽然在 JDK 的 java.net 包中已经提供了访问 HTTP 协议的基本功能,但是它没有提供足够的灵活性和其他应用程序需要的功能。HttpClient 是 Apache Jakarta Common 下的子项目,用来提供高效的、最新的、功能丰富的支持 HTTP 协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本和建议。

 

二、使用范例 

4.3版本

1 ,通过get方式,请求网页内容。我们首先创建httpclient对象,然后通过httpclient来执行http get方法,httpresponse获得服务端响应的所有内容,httpentity为获取的网页消息体。

CloseableHttpClient httpclient = HttpClients.createDefault();
        try {
            // 以get方法执行请求
            HttpGet httpGet = new HttpGet(“http://localhost/”);
            // 获得服务器响应的所有信息
            CloseableHttpResponse responseGet = httpclient.execute(httpGet);
            try {
                System.out.println(responseGet.getStatusLine());
                // 获得服务器响应的消息体(不包括http head)
                HttpEntity entity = responseGet.getEntity();

                if (entity != null) {
                    // 获得响应字符集编码
                    ContentType contentType = ContentType.getOrDefault(entity);
                    Charset charset = contentType.getCharset();
                    InputStream is = entity.getContent();
                    // 将inputstream转化为reader,并使用缓冲读取,还可按行读取内容
                    BufferedReader br = new BufferedReader(
                            new InputStreamReader(is, charset));
                    String line = null;
                    while ((line = br.readLine()) != null) {
                        System.out.println(line);
                    }
                    is.close();
                }
            } finally {
                responseGet.close();
            }

        } finally {
            httpclient.close();
        }

 

2 ,通过post方式提交表单。浏览器可将登录后的会话信息存储到本地,登陆之后的每次请求都会自动向服务器发送cookie信息,幸好的是httpclient亦可自动处理cookie信息。

CloseableHttpClient httpclient = HttpClients.createDefault();

            // 以post方法发起登录请求
            String urlString = "http://localhost/llogin.do";
            HttpPost httpPost = new HttpPost(urlString);
            List<NameValuePair> nvps = new ArrayList<NameValuePair>();
            nvps.add(new BasicNameValuePair("username", "admin"));
            nvps.add(new BasicNameValuePair("password", "admin"));
            // 添加post参数
            httpPost.setEntity(new UrlEncodedFormEntity(nvps));
            CloseableHttpResponse response = httpclient.execute(httpPost);

            try {
                // 状态302的话,重定向,则无法获取响应消息体
                System.out.println(response.getStatusLine());
                // 获得服务器响应的消息体(不包括http head)
                HttpEntity entity = response.getEntity();

                if (entity != null) {
                    // 获得响应字符集编码
                    ContentType contentType = ContentType.getOrDefault(entity);
                    Charset charset = contentType.getCharset();
                    InputStream is = entity.getContent();
                    // 将inputstream转化为reader,并使用缓冲读取,还可按行读取内容
                    BufferedReader br = new BufferedReader(
                            new InputStreamReader(is, charset));
                    String line = null;
                    while ((line = br.readLine()) != null) {
                        System.out.println(line);
                    }
                    is.close();
                }

            } finally {
                response.close();
            }

 

3 ,重定向。httpclient默认可自动处理重定向请求,但是post方式需另外设置。

LaxRedirectStrategy redirectStrategy = new LaxRedirectStrategy();
        CloseableHttpClient httpclient = HttpClients.custom()
                .setRedirectStrategy(redirectStrategy)
                .build();
        HttpClientContext context = HttpClientContext.create();
        try {
            // 以post方法执行登录请求
            HttpPost httpPost = new HttpPost(urlString);
            List<NameValuePair> nvps = new ArrayList<NameValuePair>();
            nvps.add(new BasicNameValuePair("username", "admin"));
            nvps.add(new BasicNameValuePair("password", "admin"));
            // 添加post参数
            httpPost.setEntity(new UrlEncodedFormEntity(nvps));
            CloseableHttpResponse response = httpclient.execute(httpPost, context);

            try {
                // 状态302的话,重定向,则无法获取响应消息体
                System.out.println(response.getStatusLine());
                // 获得服务器响应的消息体(不包括http head)
                HttpEntity entity = response.getEntity();

                //输出最终访问地址
                HttpHost targetHost = context.getTargetHost();
                System.out.println(targetHost);
                List<URI> redirecLocations = context.getRedirectLocations();
                URI location = URIUtils.resolve(httpPost.getURI(), targetHost, redirecLocations);
                System.out.println("Final HTTP location: " + location.toASCIIString());
                
                
                if (entity != null) {
                    // 获得响应字符集编码
                    ContentType contentType = ContentType.getOrDefault(entity);
                    Charset charset = contentType.getCharset();
                    InputStream is = entity.getContent();
                    // 将inputstream转化为reader,并使用缓冲读取,还可按行读取内容
                    BufferedReader br = new BufferedReader(
                            new InputStreamReader(is, charset));
                    String line = null;
                    while ((line = br.readLine()) != null) {
                        System.out.println(line);
                    }
                    is.close();
                }

            } finally {
                response.close();
            }

        } finally {
            httpclient.close();
        }

 

三、 网页抓取(web蜘蛛)实例 抓取腾讯新闻头条

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.zip.GZIPInputStream;

import javax.net.ssl.SSLHandshakeException;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.ParseException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpParams;

public class httpTest {
    private static DefaultHttpClient httpClient;

    static {
        httpClient = new DefaultHttpClient();
    }

    public String getHtml(String url) {
        HttpHost proxyHost = new HttpHost("211.142.236.137", 8080);// 代理
        String html = getHtml(url, proxyHost);
        int count = 0;

        while ("".equals(html) || html == null) {
            proxyHost = new HttpHost("211.142.236.137", 80);// 更换代理
            html = getHtml(url, proxyHost);
            count++;
            if (count > 3) {
                System.out.println("抓取失败");
                return null;
            }
        }
        return html;
    }

    public String getHtml(String url, HttpHost proxyHost) {
        String html = "";

        HttpGet get = new HttpGet(url);
        get.addHeader("Accept", "text/html, application/xhtml+xml, */*");
        get.addHeader("Accept-Language", "zh-CN,en-US;q=0.5");
        get.addHeader("User-Agent",
                "Mozilla/27.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)");
        get.addHeader("Accept-Encoding", "gzip, deflate, sdch");
        get.addHeader("Connection", "Keep-alive");

//        HttpParams setParameter = get.getParams().setParameter(
//                ConnRoutePNames.DEFAULT_PROXY, proxyHost);// 设置代理

        HttpResponse httpResponse;
        HttpEntity httpEntity;
        try {
            httpResponse = httpClient.execute(get);
            int statusCode = httpResponse.getStatusLine().getStatusCode();
            if (200 != statusCode)
                return html;
            httpEntity = httpResponse.getEntity();
            if (httpEntity != null)
                html = readHtmlContentFromEntity(httpEntity);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (get != null)
                get.releaseConnection();
        }
        return html;
    }

    private String readHtmlContentFromEntity(HttpEntity httpEntity)
            throws ParseException, IOException {
        String html = "";
        Header header = httpEntity.getContentEncoding();

        InputStream in = httpEntity.getContent();
        if (header != null && "gzip".equals(header.getValue())) {
            html = unZip(in, ContentType.getOrDefault(httpEntity).getCharset()
                    .toString());
        } else {
            html = readInStreamToString(in, ContentType
                    .getOrDefault(httpEntity).getCharset().toString());
        }
        if (in != null) {
            in.close();
        }
        return html;
    }

    private String unZip(InputStream in, String charSet) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        GZIPInputStream gis = null;
        try {
            gis = new GZIPInputStream(in);
            byte[] _byte = new byte[1024];
            int len = 0;
            while ((len = gis.read(_byte)) != -1) {
                baos.write(_byte, 0, len);
            }

            String unzipString = new String(baos.toByteArray(), charSet);
            return unzipString;
        } finally {
            if (gis != null) {
                gis.close();
            }
            if (baos != null) {
                baos.close();
            }
        }
    }

    private String readInStreamToString(InputStream in, String charSet)
            throws IOException {
        StringBuilder str = new StringBuilder();
        String line;
        BufferedReader bufferedReader = new BufferedReader(
                new InputStreamReader(in, charSet));
        while ((line = bufferedReader.readLine()) != null) {
            str.append(line);
            str.append("\n");
        }
        if (bufferedReader != null) {
            bufferedReader.close();
        }
        return str.toString();
    }
}

主函数

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class GetNews {
    public static void main(String[] argu) {
        httpTest httpConnectionManager = new httpTest();
        String html = httpConnectionManager.getHtml("http://www.qq.com");
        if (html != null && !html.equals("")) {
            Document doc = Jsoup.parse(html);
            Elements newsList = doc.select("div[class=ft fl]").select("li").select("a");
            for (Element element : newsList) {
                System.out.println(element.text() + "    链接" + element.attr("href"));
            }
        }
    }
}

实例见: http://www.cnblogs.com/updateofsimon/category/550506.html .

 

四、 文件上传

    文件上传可以使用两种方式实现,一种是PostMethod方式,一种是HttpPost方式。两者的处理大同小异。PostMethod是使用FileBody将文件包装流包装起来,HttpPost是使用FilePart将文件流包装起来。在传递文件流给服务端的时候,都可以同时传递其他的参数。

客户端处理

HttpPost方式

     这种方式,与上面类似,只不过变成了FileBody。上面的Part数组在这里对应HttpEntity。此处的HttpClient是org.apache.http.client.methods下的。

复制代码
 1 public void upload(String localFile){
 2         CloseableHttpClient httpClient = null;
 3         CloseableHttpResponse response = null;
 4         try {
 5             httpClient = HttpClients.createDefault();
 6             
 7             // 把一个普通参数和文件上传给下面这个地址 是一个servlet
 8             HttpPost httpPost = new HttpPost(URL_STR);
 9             
10             // 把文件转换成流对象FileBody
11             FileBody bin = new FileBody(new File(localFile));
12 
13             StringBody userName = new StringBody("Scott", ContentType.create(
14                     "text/plain", Consts.UTF_8));
15             StringBody password = new StringBody("123456", ContentType.create(
16                     "text/plain", Consts.UTF_8));
17 
18             HttpEntity reqEntity = MultipartEntityBuilder.create()
19                     // 相当于<input type="file" name="file"/>
20                     .addPart("file", bin)
21                     
22                     // 相当于<input type="text" name="userName" value=userName>
23                     .addPart("userName", userName)
24                     .addPart("pass", password)
25                     .build();
26 
27             httpPost.setEntity(reqEntity);
28 
29             // 发起请求 并返回请求的响应
30             response = httpClient.execute(httpPost);
31             
32             System.out.println("The response value of token:" + response.getFirstHeader("token"));
33                 
34             // 获取响应对象
35             HttpEntity resEntity = response.getEntity();
36             if (resEntity != null) {
37                 // 打印响应长度
38                 System.out.println("Response content length: " + resEntity.getContentLength());
39                 // 打印响应内容
40                 System.out.println(EntityUtils.toString(resEntity, Charset.forName("UTF-8")));
41             }
42             
43             // 销毁
44             EntityUtils.consume(resEntity);
45         }catch (Exception e){
46             e.printStackTrace();
47         }finally {
48             try {
49                 if(response != null){
50                     response.close();
51                 }
52             } catch (IOException e) {
53                 e.printStackTrace();
54             }
55             
56             try {
57                 if(httpClient != null){
58                     httpClient.close();
59                 }
60             } catch (IOException e) {
61                 e.printStackTrace();
62             }
63         }
64     }
复制代码

 

服务端处理

     无论客户端是哪种上传方式,服务端的处理都是一样的。在通过HttpServletRequest获得参数之后,把得到的Item进行分类,分为普通的表单和File表单。    

     通过ServletFileUpload 可以设置上传文件的大小及编码格式等。

     总之,服务端的处理是把得到的参数当做HTML表单进行处理的。     

复制代码
 1 public void processUpload(HttpServletRequest request, HttpServletResponse response){
 2         File uploadFile = new File(uploadPath);
 3         if (!uploadFile.exists()) {
 4             uploadFile.mkdirs();
 5         }
 6 
 7         System.out.println("Come on, baby .......");
 8         
 9         request.setCharacterEncoding("utf-8");  
10         response.setCharacterEncoding("utf-8");  
11           
12         //检测是不是存在上传文件  
13         boolean isMultipart = ServletFileUpload.isMultipartContent(request);  
14           
15         if(isMultipart){  
16             DiskFileItemFactory factory = new DiskFileItemFactory();  
17             
18             //指定在内存中缓存数据大小,单位为byte,这里设为1Mb  
19             factory.setSizeThreshold(1024*1024);  
20            
21             //设置一旦文件大小超过getSizeThreshold()的值时数据存放在硬盘的目录   
22             factory.setRepository(new File("D:\\temp"));  
23             
24             // Create a new file upload handler  
25             ServletFileUpload upload = new ServletFileUpload(factory);  
26            
27             // 指定单个上传文件的最大尺寸,单位:字节,这里设为50Mb    
28             upload.setFileSizeMax(50 * 1024 * 1024);    
29             
30             //指定一次上传多个文件的总尺寸,单位:字节,这里设为50Mb  
31             upload.setSizeMax(50 * 1024 * 1024);     
32             upload.setHeaderEncoding("UTF-8");
33               
34             List<FileItem> items = null;  
35               
36             try {  
37                 // 解析request请求  
38                 items = upload.parseRequest(request);  
39             } catch (FileUploadException e) {  
40                 e.printStackTrace();  
41             }  
42             
43             if(items!=null){  
44                 //解析表单项目  
45                 Iterator<FileItem> iter = items.iterator();  
46                 while (iter.hasNext()) {  
47                     FileItem item = iter.next(); 
48                     
49                     //如果是普通表单属性  
50                     if (item.isFormField()) {  
51                         //相当于input的name属性   <input type="text" name="content">  
52                         String name = item.getFieldName();
53                         
54                         //input的value属性  
55                         String value = item.getString();
56                         
57                         System.out.println("属性:" + name + " 属性值:" + value);  
58                     }  
59                     //如果是上传文件  
60                     else {  
61                         //属性名  
62                         String fieldName = item.getFieldName();  
63                         
64                         //上传文件路径  
65                         String fileName = item.getName();  
66                         fileName = fileName.substring(fileName.lastIndexOf("/") + 1);// 获得上传文件的文件名  
67                         
68                         try {  
69                             item.write(new File(uploadPath, fileName));  
70                         } catch (Exception e) {  
71                             e.printStackTrace();  
72                         }  
73                     } 
74                 }  
75             }  
76         }  
77         
78         response.addHeader("token", "hello");
79     }
复制代码

    服务端在处理之后,可以在Header中设置返回给客户端的简单信息。如果返回客户端是一个流的话,流的大小必须提前设置!

    response.setContentLength((int) file.length());

posted @ 2014-03-16 16:27  匡时@下一站.info  阅读(585)  评论(0编辑  收藏  举报