java实现https爬取

来自:  https://blog.csdn.net/Sakuraaaaaaa/article/details/107280162

 

HttpsUtil 工具类

import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustStrategy;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.ssl.SSLContextBuilder;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;

public class HttpsUtils {
    private static final String HTTP = "http";
    private static final String HTTPS = "https";
    private static SSLConnectionSocketFactory sslsf = null;
    private static PoolingHttpClientConnectionManager cm = null;
    private static SSLContextBuilder builder = null;
    static {
        try {
            builder = new SSLContextBuilder();
            // 全部信任 不做身份鉴定
            builder.loadTrustMaterial(null, new TrustStrategy() {
                @Override
                public boolean isTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
                    return true;
                }
            });
            sslsf = new SSLConnectionSocketFactory(builder.build(), new String[]{"SSLv2Hello", "SSLv3", "TLSv1", "TLSv1.2"}, null, NoopHostnameVerifier.INSTANCE);
            Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
                    .register(HTTP, new PlainConnectionSocketFactory())
                    .register(HTTPS, sslsf)
                    .build();
            cm = new PoolingHttpClientConnectionManager(registry);
            cm.setMaxTotal(200);//max connection
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static CloseableHttpClient getHttpClient() throws Exception {
        CloseableHttpClient httpClient = HttpClients.custom()
                .setSSLSocketFactory(sslsf)
                .setConnectionManager(cm)
                .setConnectionManagerShared(true)
                .build();
        return httpClient;
    }

}

 

 

测试  主要添加了 User-Agent 请求头


 

public void testHttps() throws Exception {
        //创建HttpClient对象
        CloseableHttpClient httpClient = HttpsUtils.getHttpClient();
        //创建get对象
        HttpGet httpGet = new HttpGet("https://search.jd.com/Search?keyword=%E7%94%B5%E8%84%91&enc=utf-8&pvid=b1deb5e2163141b8bebbb6c0505a4fca");
        httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36");
        //执行请求
        CloseableHttpResponse response = httpClient.execute(httpGet);
        //接收结果
        HttpEntity entity = response.getEntity();
        String html = EntityUtils.toString(entity,"utf-8");
        //打印结果
        System.out.println(html);
        //关闭连接
        response.close();
    }

 

posted @ 2021-04-21 09:56  我没有出家  阅读(535)  评论(0编辑  收藏  举报