URLConnection简单使用_md

import com.google.common.collect.Lists;
import com.meiyunji.spider.contant.HttpConnectContant;
import com.meiyunji.spider.crawl.response.HeaderVo;
import com.meiyunji.spider.crawl.response.ResponseObj;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.net.ssl.*;
import java.io.*;
import java.net.*;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;

/**
 * @author ldj
 * @date 2018/6/25.
 * <p>
 * 使用 httpUrlConnection请求信息
 */
public class HttpConnectUtil {

    private static final Logger logger = LoggerFactory.getLogger(HttpConnectUtil.class);

    /**
     * Get 方法
     *
     * @param url        请求连接
     * @param ip         本次请求使用的ip
     * @param headers    请求头, 为空会填写默认头部信息
     * @param parameters 请求体,
     * @return ResponseObj 返回结果
     **/
    public static ResponseObj urlGet(String url, String ip, Map<String, String> headers, Map<String, String> parameters) {
        return urlBaseGet(url, ip, null, headers, parameters);
    }

    /**
     * 代理请求
     *
     * @param url        请求连接
     * @param ip         代理ip
     * @param port       代理端口
     * @param headers    请求头, 为空会填写默认头部信息
     * @param parameters 请求体,
     * @return ResponseObj 返回结果
     */
    public static ResponseObj urlPorxyGet(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
        return urlBaseGet(url, ip, port, headers, parameters);
    }

    /**
     * Get 请求
     */
    private static ResponseObj urlBaseGet(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {

        ResponseObj responseObj;
        try {
            url = url.trim();
            if (parameters != null) {
                StringBuilder sbd = new StringBuilder(url);
                parameters.forEach((key, value) -> sbd.append(StringUtils.containsIgnoreCase(sbd, "?") ? "?" : "&").append(key).append("=").append(value));
                url = sbd.toString();
            }
            URL requestUrl = new URL(url);
            if (HttpConnectContant.CONNECT_TYPE_HTTPS.equalsIgnoreCase(requestUrl.getProtocol())) {
                responseObj = httpsConnectionGet(requestUrl, ip, port, headers, parameters);
            } else {
                responseObj = httpConnectionGet(requestUrl, ip, port, headers, parameters);
            }
        } catch (IOException e) {
            responseObj = new ResponseObj();
            responseObj.setStatusCode(-1);
            responseObj.setReasonPhrase("Crawl Error : URL DealHtml error, the url is not support request.");
        }
        responseObj.setIp(ip);
        responseObj.setUrl(url);

        logger.info("url: {}", url);
        logger.info("statusCode: {}", responseObj.getStatusCode());
        logger.info("message: {}", responseObj.getReasonPhrase());
        return responseObj;
    }

    /**
     * Https Get 方法
     *
     * @param  requestUrl 请求连接
     * @param ip 本次请求使用的ip
     * @param headers 请求头, 为空会填写默认头部信息
     * @param parameters 请求体,
     * @return  ResponseObj 返回结果
     * String ua,
     * String html,
     * String statusCode,
     * String ip;
     * String url;
     * String userAgent;
     */
    private static ResponseObj httpsConnectionGet(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
        InputStreamReader reader = null;
        GZIPInputStream gzipInputStream = null;
        InputStream inputStream = null;
        HttpsURLConnection httpsURLConnection = null;
        ResponseObj responseObj = new ResponseObj();
        try {
            // 获取 URLConnection对象
            httpsURLConnection = (HttpsURLConnection) getUrlConnection(requestUrl, ip, port);
            setDefaultProperties(httpsURLConnection);
            httpsURLConnection.setRequestMethod(HttpConnectContant.GET);

            if (headers == null || headers.size() == 0) {
                setDefaultHeader(httpsURLConnection);
            } else {
                setCustomizedHeader(httpsURLConnection, headers);
            }

            /// 设置绕过https验证
            trustAllHosts(httpsURLConnection);
            httpsURLConnection.setHostnameVerifier(DO_NOT_VARIFY);

            // 打开链接
            httpsURLConnection.connect();

            // 获取结果
            if (httpsURLConnection.getResponseCode() == HttpStatus.SC_OK || httpsURLConnection.getResponseCode() == HttpStatus.SC_CREATED) {
                inputStream = httpsURLConnection.getInputStream();
            } else {
                inputStream = httpsURLConnection.getErrorStream();
            }
            String contentEncoding = httpsURLConnection.getContentEncoding();
            if (contentEncoding != null && "gzip".equals(contentEncoding.trim().toLowerCase())) {
                gzipInputStream = new GZIPInputStream(inputStream);
                reader = new InputStreamReader(gzipInputStream);
            } else {
                reader = new InputStreamReader(inputStream);
            }

            StringBuilder sbfs = new StringBuilder();
            char[] c = new char[1024];
            int len;
            while (0 < (len = reader.read(c))) {
                sbfs.append(c, 0, len);
            }
            List<HeaderVo> headerList = Lists.newArrayList();
            Map<String, List<String>> responseHeaderFields = httpsURLConnection.getHeaderFields();
            if (responseHeaderFields != null && responseHeaderFields.size() > 0) {
                responseHeaderFields.forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
            }

            responseObj.setStatusCode(httpsURLConnection.getResponseCode());
            responseObj.setContent(sbfs.toString());
            responseObj.setResponseHeader(headerList);
            responseObj.setUserAgent(httpsURLConnection.getRequestProperty("user-agent"));

        } catch (IOException e) {
            e.printStackTrace();
            responseObj.setStatusCode(-1);
            responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
                if (gzipInputStream != null) {
                    gzipInputStream.close();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
                if (httpsURLConnection != null) {
                    httpsURLConnection.disconnect();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return responseObj;
    }

    /**
     * Http Get 方法
     *
     * @param  requestUrl 请求连接
     * @param ip 本次请求使用的ip
     * @param headers 请求头, 为空会填写默认头部信息
     * @param parameters 请求体,
     * @return  ResponseObj 返回结果
     * String ua,
     * String html,
     * String statusCode,
     * String ip;
     * String url;
     * URL requestUrl;
     * String userAgent;
     */
    private static ResponseObj httpConnectionGet(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
        InputStreamReader reader = null;
        GZIPInputStream gzipInputStream = null;
        InputStream inputStream = null;
        HttpURLConnection httpUrlConnection = null;
        ResponseObj responseObj = new ResponseObj();
        try {
            httpUrlConnection = (HttpURLConnection) getUrlConnection(requestUrl, ip, port);
            setDefaultProperties(httpUrlConnection);
            httpUrlConnection.setRequestMethod(HttpConnectContant.GET);
            if (headers == null || headers.size() <= 0) {
                setDefaultHeader(httpUrlConnection);
            } else {
                setCustomizedHeader(httpUrlConnection, headers);
            }

            httpUrlConnection.connect();

            if (httpUrlConnection.getResponseCode() == HttpStatus.SC_OK || httpUrlConnection.getResponseCode() == HttpStatus.SC_CREATED) {
                inputStream = httpUrlConnection.getInputStream();
            } else {
                inputStream = httpUrlConnection.getErrorStream();
            }
            String contentEncoding = httpUrlConnection.getContentEncoding();
            if (contentEncoding != null && "gzip".equals(contentEncoding.trim().toLowerCase())) {
                gzipInputStream = new GZIPInputStream(inputStream);
                reader = new InputStreamReader(gzipInputStream);
            } else {
                reader = new InputStreamReader(inputStream);
            }
            StringBuilder sbfs = new StringBuilder();
            char[] c = new char[1024];
            int len;
            while (0 < (len = reader.read(c))) {
                sbfs.append(c, 0, len);
            }

            List<HeaderVo> headerList = Lists.newArrayList();
            httpUrlConnection.getHeaderFields().forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
            responseObj.setStatusCode(httpUrlConnection.getResponseCode());
            responseObj.setContent(sbfs.toString());
            responseObj.setResponseHeader(headerList);
            responseObj.setUrl(requestUrl.getProtocol() + "://" + requestUrl.getHost() + requestUrl.getPath());
            if (headers != null) {
                responseObj.setUserAgent(headers.get("user_agent"));
            }

        } catch (IOException e) {
            responseObj.setStatusCode(-1);
            responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
                if (gzipInputStream != null) {
                    gzipInputStream.close();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
                if (httpUrlConnection != null) {
                    httpUrlConnection.disconnect();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return responseObj;
    }

    /**
     * POST 请求方式
     * 默认以JSON的形式传递数据
     */
    public static ResponseObj urlPost(String url, String ip, Map<String, String> headers, Map<String, String> bodyMap) {
        return urlAdvancedPost(url, ip, headers, bodyMap, HttpConnectContant.POST_BODY_TYPE_JSON);
    }

    /**
     * POST 走代理请求
     */
    public static ResponseObj urlProxyPost(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap) {
        return urlAdvancedPost(url, ip, headers, bodyMap, HttpConnectContant.POST_BODY_TYPE_JSON);
    }

    /**
     * POST请求方式
     *
     * @param url       请求连接
     * @param ip        请求ip
     * @param headers   请求头
     * @param bodyMap   请求体
     * @param submiType 提交数据方式  format / json
     */
    public static ResponseObj urlAdvancedPost(String url, String ip, Map<String, String> headers, Map<String, String> bodyMap, String submiType) {
        return urlBasePost(url, ip, null, headers, bodyMap, submiType);
    }

    /**
     * POST 代理请求方式
     *
     * @param url        请求连接
     * @param ip         代理请求ip
     * @param port       代理请求端口
     * @param headers    请求头
     * @param bodyMap    请求体
     * @param submitType 提交数据方式  format / json
     */
    public static ResponseObj urlAdvancedProxyPost(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
        return urlBasePost(url, ip, port, headers, bodyMap, submitType);
    }

    private static ResponseObj urlBasePost(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
        ResponseObj responseObj;
        try {
            URL requestUrl = new URL(url);
            if (HttpConnectContant.CONNECT_TYPE_HTTPS.equalsIgnoreCase(requestUrl.getProtocol())) {
                responseObj = httpsConnectionPost(requestUrl, ip, port, headers, bodyMap, submitType);
            } else {
                responseObj = httpConnectionPost(requestUrl, ip, port, headers, bodyMap, submitType);
            }
        } catch (IOException e) {
            responseObj = new ResponseObj();
            responseObj.setStatusCode(-1);
            responseObj.setReasonPhrase("Crawl Error : URL DealHtml error, the url is not support request.");
        }
        responseObj.setIp(ip);
        responseObj.setUrl(url);
        logger.info("url: {}", url);
        logger.info("statusCode: {}", responseObj.getStatusCode());
        logger.info("message: {}", responseObj.getReasonPhrase());
        return responseObj;
    }

    /**
     * https POST方法
     */
    private static ResponseObj httpsConnectionPost(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
        Reader reader = null;
        GZIPInputStream gzipInputStream = null;
        InputStream inputStream = null;
        HttpsURLConnection httpsURLConnection = null;
        ResponseObj responseObj = new ResponseObj();
        try {
            httpsURLConnection = (HttpsURLConnection) getUrlConnection(requestUrl, ip, port);
            httpsURLConnection.setRequestMethod(HttpConnectContant.POST);

            // 设置默认请求属性
            setDefaultProperties(httpsURLConnection);
            httpsURLConnection.setUseCaches(false);

            // 绕过 https 验证
            trustAllHosts(httpsURLConnection);
            httpsURLConnection.setHostnameVerifier(DO_NOT_VARIFY);

            // 拼接请求头
            if (headers == null || headers.size() <= 0) {
                setDefaultHeader(httpsURLConnection);
            } else {
                setCustomizedHeader(httpsURLConnection, headers);
            }

            // 拼接请求体
            String parameterStr = getPostBody(bodyMap, submitType);

            byte[] writeBytes = parameterStr.getBytes();
            httpsURLConnection.setRequestProperty("Content-Length", String.valueOf(writeBytes.length));

            // 发起请求
            OutputStream outputStream = httpsURLConnection.getOutputStream();
            outputStream.write(writeBytes);
            outputStream.flush();
            outputStream.close();

            if (httpsURLConnection.getResponseCode() == HttpStatus.SC_OK || httpsURLConnection.getResponseCode() == HttpStatus.SC_CREATED) {
                inputStream = httpsURLConnection.getInputStream();
            } else {
                inputStream = httpsURLConnection.getErrorStream();
            }
            String contentEncoding = httpsURLConnection.getContentEncoding();
            if (contentEncoding != null && HttpConnectContant.RESPONSE_CODE_GZIP.equals(contentEncoding.trim().toLowerCase())) {
                gzipInputStream = new GZIPInputStream(inputStream);
                reader = new InputStreamReader(gzipInputStream);
            } else {
                reader = new InputStreamReader(inputStream);
            }

            StringBuilder sbd = new StringBuilder();
            char[] c = new char[1024];
            int len;
            while (0 < (len = reader.read(c))) {
                sbd.append(c, 0, len);
            }

            List<HeaderVo> headerList = Lists.newArrayList();
            Map<String, List<String>> responseHeaderFields = httpsURLConnection.getHeaderFields();
            if (responseHeaderFields != null && responseHeaderFields.size() > 0) {
                responseHeaderFields.forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
            }

            responseObj.setStatusCode(httpsURLConnection.getResponseCode());
            responseObj.setContent(sbd.toString());
            responseObj.setResponseHeader(headerList);
            responseObj.setUserAgent(httpsURLConnection.getRequestProperty("user-agent"));

        } catch (IOException e) {
            e.printStackTrace();
            responseObj.setStatusCode(-1);
            responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
                if (gzipInputStream != null) {
                    gzipInputStream.close();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
                if (httpsURLConnection != null) {
                    httpsURLConnection.disconnect();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return responseObj;
    }

    /**
     * https POST方法
     */
    private static ResponseObj httpConnectionPost(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
        Reader reader = null;
        GZIPInputStream gzipInputStream = null;
        InputStream inputStream = null;
        HttpURLConnection httpURLConnection = null;
        ResponseObj responseObj = new ResponseObj();
        try {
            httpURLConnection = (HttpURLConnection) getUrlConnection(requestUrl, ip, port);
            httpURLConnection.setRequestMethod(HttpConnectContant.POST);

            // 设置默认请求属性
            setDefaultProperties(httpURLConnection);
            httpURLConnection.setUseCaches(false);

            // 拼接请求头
            if (headers == null || headers.size() <= 0) {
                setDefaultHeader(httpURLConnection);
            } else {
                setCustomizedHeader(httpURLConnection, headers);
            }

            // 拼接请求体
            String parameterStr = getPostBody(bodyMap, submitType);

            byte[] writeBytes = parameterStr.getBytes();
            httpURLConnection.setRequestProperty("Content-Length", String.valueOf(writeBytes.length));

            // 发起请求
            OutputStream outputStream = httpURLConnection.getOutputStream();
            outputStream.write(writeBytes);
            outputStream.flush();
            outputStream.close();

            if (httpURLConnection.getResponseCode() == HttpStatus.SC_OK || httpURLConnection.getResponseCode() == HttpStatus.SC_CREATED) {
                inputStream = httpURLConnection.getInputStream();
            } else {
                inputStream = httpURLConnection.getErrorStream();
            }
            String contentEncoding = httpURLConnection.getContentEncoding();
            if (contentEncoding != null && HttpConnectContant.RESPONSE_CODE_GZIP.equals(contentEncoding.trim().toLowerCase())) {
                gzipInputStream = new GZIPInputStream(inputStream);
                reader = new InputStreamReader(gzipInputStream);
            } else {
                reader = new InputStreamReader(inputStream);
            }

            StringBuilder sbd = new StringBuilder();
            char[] c = new char[1024];
            int len;
            while (0 < (len = reader.read(c))) {
                sbd.append(c, 0, len);
            }

            List<HeaderVo> headerList = Lists.newArrayList();
            Map<String, List<String>> responseHeaderFields = httpURLConnection.getHeaderFields();
            if (responseHeaderFields != null && responseHeaderFields.size() > 0) {
                responseHeaderFields.forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
            }

            responseObj.setStatusCode(httpURLConnection.getResponseCode());
            responseObj.setContent(sbd.toString());
            responseObj.setResponseHeader(headerList);
            responseObj.setUserAgent(httpURLConnection.getRequestProperty("user-agent"));

        } catch (IOException e) {
            e.printStackTrace();
            responseObj.setStatusCode(-1);
            responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
                if (gzipInputStream != null) {
                    gzipInputStream.close();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
                if (httpURLConnection != null) {
                    httpURLConnection.disconnect();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return responseObj;
    }

    /**
     * 拼接 POST 请求 请求体
     */
    private static String getPostBody(Map<String, String> bodyMap, String submitType) {
        String parameterStr;
        StringBuilder sbd = new StringBuilder();
        if (bodyMap != null && bodyMap.size() > 0 && HttpConnectContant.POST_BODY_TYPE_FORMAT.equals(submitType)) {
            //todo 数组类型数据不合适,需要优化
            bodyMap.forEach((key, value) -> sbd.append(key).append("=").append(value).append("&"));
            parameterStr = sbd.toString();
        } else {
            parameterStr = JsonUtil.objectToJson(bodyMap);
        }
        if (StringUtils.isBlank(parameterStr)) {
            parameterStr = "";
        }
        return parameterStr;
    }

//region---------------------------------------设置头部信息-----------------------------------------------------

    /**
     * 先设置默认头部信息
     * 然后用新的头部信息覆盖默认信息
     */
    private static void setCustomizedHeader(URLConnection urlConnection, Map<String, String> headerMap) {
        setDefaultHeader(urlConnection);
        for (Map.Entry<String, String> entry : headerMap.entrySet()) {
            urlConnection.setRequestProperty(entry.getKey(), entry.getValue());
        }
    }

    /**
     * 设置默认头部信息
     */
    private static void setDefaultHeader(URLConnection urlConnection) {
        urlConnection.setRequestProperty("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
        urlConnection.setRequestProperty("accept-encoding", "gzip, deflate, br");
        urlConnection.setRequestProperty("accept-language", "en-US,en;q=0.9,fr-FR;q=0.8,fr-CA;q=0.7,fr;q=0.6,de;q=0.5,zh-CN;q=0.4,zh;q=0.3,en-AU;q=0.2");
        urlConnection.setRequestProperty("cache-control", "max-age=0");
        urlConnection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36");
        urlConnection.setRequestProperty("connection", "keep-alive");
    }

    /**
     * 设置默认属性
     */
    private static void setDefaultProperties(URLConnection urlConnection) {
        urlConnection.setDoOutput(true);
        urlConnection.setDoInput(true);
        urlConnection.setConnectTimeout(HttpConnectContant.CONNECT_TIME_OUT_TIME);

        // 系统超时设置, 防止网络异常情况下, 可能会导致程序僵死而不继续往下执行
        System.setProperty("sun.net.client.defaultConnectionTimeOut", String.valueOf(HttpConnectContant.CONNECT_TIME_OUT_TIME));
        System.setProperty("sun.net.client.defaultReadTimeout", String.valueOf(HttpConnectContant.CONNECT_TIME_OUT_TIME));
    }

    /**
     * 打开连接,获取连接对象
     */
    private static URLConnection getUrlConnection(URL requestUrl, String ip, Integer port) throws IOException {
        Proxy proxy = null;
        if (StringUtils.isNotBlank(ip) && port != null && port > 0) {
            proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
        }
        if (proxy == null) {
            return requestUrl.openConnection();
        }
        return requestUrl.openConnection(proxy);
    }


//endregion-------------------------------------------------------------------------------------------------------------

//region ------------------------------------- 绕过 https 验证 -----------------------------------------------------------
    /**
     * 覆盖java默认的证书验证
     */
    private static final TrustManager[] TRUST_ALL_CERTS = new TrustManager[]{
            new X509TrustManager() {
                @Override
                public void checkClientTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {

                }

                @Override
                public void checkServerTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {

                }

                @Override
                public X509Certificate[] getAcceptedIssuers() {
                    return new X509Certificate[]{};
                }
            }
    };

    /**
     * 设置不验证主机
     */
    private static final HostnameVerifier DO_NOT_VARIFY = new HostnameVerifier() {
        @Override
        public boolean verify(String s, SSLSession sslSession) {
            return true;
        }
    };

    /**
     * 信任所有的链接
     */
    private static SSLSocketFactory trustAllHosts(HttpsURLConnection connection) {
        SSLSocketFactory sslSocketFactory = connection.getSSLSocketFactory();
        try {
            SSLContext tls = SSLContext.getInstance("TLS");
            tls.init(null, TRUST_ALL_CERTS, new SecureRandom());
            SSLSocketFactory socketFactory = tls.getSocketFactory();
            connection.setSSLSocketFactory(socketFactory);
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        } catch (KeyManagementException e) {
            e.printStackTrace();
        }
        return sslSocketFactory;
    }
// endregion -----------------------------------------------------------------------------------------------------------


}

posted @ 2018-12-21 18:16  LDJ34  阅读(241)  评论(0编辑  收藏  举报