java实现https爬取
来自: https://blog.csdn.net/Sakuraaaaaaa/article/details/107280162
HttpsUtil 工具类
import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.NoopHostnameVerifier; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustStrategy; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.ssl.SSLContextBuilder; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; public class HttpsUtils { private static final String HTTP = "http"; private static final String HTTPS = "https"; private static SSLConnectionSocketFactory sslsf = null; private static PoolingHttpClientConnectionManager cm = null; private static SSLContextBuilder builder = null; static { try { builder = new SSLContextBuilder(); // 全部信任 不做身份鉴定 builder.loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException { return true; } }); sslsf = new SSLConnectionSocketFactory(builder.build(), new String[]{"SSLv2Hello", "SSLv3", "TLSv1", "TLSv1.2"}, null, NoopHostnameVerifier.INSTANCE); Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create() .register(HTTP, new PlainConnectionSocketFactory()) .register(HTTPS, sslsf) .build(); cm = new PoolingHttpClientConnectionManager(registry); cm.setMaxTotal(200);//max connection } catch (Exception e) { e.printStackTrace(); } } public static CloseableHttpClient getHttpClient() throws Exception { CloseableHttpClient httpClient = HttpClients.custom() .setSSLSocketFactory(sslsf) .setConnectionManager(cm) .setConnectionManagerShared(true) .build(); return httpClient; } }
测试 主要添加了 User-Agent 请求头
public void testHttps() throws Exception { //创建HttpClient对象 CloseableHttpClient httpClient = HttpsUtils.getHttpClient(); //创建get对象 HttpGet httpGet = new HttpGet("https://search.jd.com/Search?keyword=%E7%94%B5%E8%84%91&enc=utf-8&pvid=b1deb5e2163141b8bebbb6c0505a4fca"); httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"); //执行请求 CloseableHttpResponse response = httpClient.execute(httpGet); //接收结果 HttpEntity entity = response.getEntity(); String html = EntityUtils.toString(entity,"utf-8"); //打印结果 System.out.println(html); //关闭连接 response.close(); }