HttpClient

视频

https://www.bilibili.com/video/BV1cE411u7RA?p=4

https://www.bilibili.com/video/BV1W54y1s7BZ?p=4

基础文本框架

https://www.ayulong.cn/blog/12

https支持

https://blog.csdn.net/Sakuraaaaaaa/article/details/107280162

1|0HttpClient

官网：http://hc.apache.org

1|1准备

1|0maven

 <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
        <!-- HttpClient -->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.12</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
        <!--日志-->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
            <scope>test</scope>
        </dependency>

    </dependencies>

1|0创建 slf4j 日志配置文件

在 resources 目录下创建 log4j.properties 文件, 并添加以下配置

# A1 在控制台显示日志
log4j.rootLogger=DEBUG,A1
log4j.logger.cn.itcast = DEBUG

log4j.appender.A1=org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH🇲🇲ss,SSS} [%t] [%c]-[%p] %m%n

1|0最简单的爬虫程序

在java的 cn.ayulong.crawler.test 中创建 CrawlerFirst类

package cn.ayulong.crawler.test;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

public class CrawlerFirst {

    public static void main(String[] args) throws IOException {
        // 1. 打开浏览器, 创建 HttpClient 对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 2. 输入网址, 发起get请求创建HttpGet对象
        HttpGet httpGet = new HttpGet("http://www.baidu.com/");

        // 3. 按回车, 发起请求, 返回响应
        CloseableHttpResponse response = httpClient.execute(httpGet);

        // 4. 解析响应, 获取数据
        // 判断状态码是否是 200
        if (response.getStatusLine().getStatusCode() == 200) {
            HttpEntity httpEntity = response.getEntity();
            String content = EntityUtils.toString(httpEntity, "utf-8");
            System.out.println(content);
        }
    }
}
// 直接运行, 成功抓取到百度首页html并打印到控制台

注意: 此方法只能抓取 http 协议的页面, 如果想抓取 https 的页面, 可以参考

1|2GET 请求

1|0无参无头

public class HttpGetTest {

    public static void main(String[] args) {
        // 可关闭的httpclient客户端，相当于你打开的一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://www.baidu.com");

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                // 获取响应结果
                HttpEntity entity = response.getEntity();
                // 对 HttpEntity 操作的工具类
                //String content = EntityUtils.toString(response.getEntity(), "utf8");
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
                //确保流关闭
                EntityUtils.consume(entity);
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            if (response != null) {
                try {
                	response.close();
                } catch (IOException e) {
                	e.printStackTrace();
            	}
            }
            if (httpClient != null) {
                try {
                	httpClient.close();
				} catch (IOException e) {
                	e.printStackTrace();
            	}   
            }
            
        }

    }
}

1|0带请求头

public class HttpGetTest {

    public static void main(String[] args) {
        // 可关闭的httpclient客户端，相当于你打开的一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://www.baidu.com");
        // 解决httpclient被认为不是真人行为
		httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36")
        // 防盗链，url发生防盗链的网站的url
        httpGet.addHeader("Referer","https://www.baidu.com/")
            
        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                // 获取响应结果
                HttpEntity entity = response.getEntity();
                // 对 HttpEntity 操作的工具类
                //String content = EntityUtils.toString(response.getEntity(), "utf8");
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
                //确保流关闭
                EntityUtils.consume(entity);
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            if (response != null) {
                try {
                	response.close();
                } catch (IOException e) {
                	e.printStackTrace();
            	}
            }
            if (httpClient != null) {
                try {
                	httpClient.close();
				} catch (IOException e) {
                	e.printStackTrace();
            	}   
            }
            
        }

    }
}

1|0请求带参数

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 设置请求地址是: http://yun.itheima.com/search?keys=Java
        // 创建URIBuilder
        URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search");
        // 设置参数
        uriBuilder.setParameter("keys", "Java");
        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet(uriBuilder.build());

        System.out.println("发起请求的信息: " + httpGet);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

或者手动转

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
		String urlStr = "https://www.baidu.com";
        String passwordParam = "12+abc 456|789"//
        // 做urlencode：如果是浏览器的话，浏览器会帮我们自动给做了，123%2Babc+456%7c789
        passwordParam = URLEncoder.encode(passwordParam, StandardCharsets.UTF_8.name());
        urlStr = "http://demo/?" + passwordParam
        HttpGet httpGet = new HttpGet(urlStr);

        System.out.println("发起请求的信息: " + httpGet);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

1|0获取响应头以及相应的Content-Type

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
		String urlStr = "https://www.baidu.com";
        HttpGet httpGet = new HttpGet(urlStr);
        System.out.println("发起请求的信息: " + httpGet);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                System.out.println("响应成功");
                // 获取响应头
                Header[] allHeaders = response.getAllHeaders();
                // 这里会打印所有的响应头，包含 Content-Type
                for (Header header : allHeaders) {
                    System.out.println("响应头" + header.getName() + "的值" + header.getValue());
                    
                }
                // 也可以从 getEntity() 中取得
                HttpEntity entity = response.getEntity();
                System.out.println("ContentType" + entity.getContentType());
                // 对 HttpEntity 操作的工具类
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

1|0保存网络图片到本地

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
		String urlStr = "https://img2022.cnblogs.com/blog/1381066/202202/1381066-20220226153714038-1862164179.png";
        HttpGet httpGet = new HttpGet(urlStr);
		// 可关闭的响应
        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            HttpEntity entity = response.getEntity();
            // image/jpg  image/jpeg  image/png  image/图片的后缀
            String contentType = entity.getContentType.getValue();
            String suffix = '.jpg';
            if (contentType.contains("jpg")||contentType.contains("jpeg")) {
                suffix = '.jpg';
            }else if (contentType.contains("bmp")||contentType.contains("bitbmp")) {
                suffix = '.bmp';
            }else if (contentType.contains("png")) {
                suffix = '.png';
            }else if (contentType.contains("gif")) {
                suffix = '.gif';
            }
            // 获取文件的字节流
            byte[] bytes = EntityUtils.toByteArray(entity);
            String localAbsPath = "e:\\abbc" + suffix;
            FileOutputStream fos = new FileOutputStream(localAbsPath);
            fos.write(bytes);
            fos.close();
            EntityUtils.consume(entity);
            
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

1|0设置访问代理

public class HttpGetTest {

    public static void main(String[] args) {
        // 可关闭的httpclient客户端，相当于你打开的一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://www.baidu.com");
		// 创建一个代理
        String ip = "114.7.192.253";
        int port = 8080;
        HttpHost proxy = new HttpHost(ip, port);
        // 对每一个请求进行配置，会覆盖全局的默认请求配置
        RequestConfig requestConfig = RequestConfig.custom().setProxy(proxy).build();
        httpGet.setConfig(requestConfig);
        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                // 获取响应结果
                HttpEntity entity = response.getEntity();
                // 对 HttpEntity 操作的工具类
                //String content = EntityUtils.toString(response.getEntity(), "utf8");
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
                //确保流关闭
                EntityUtils.consume(entity);
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            if (response != null) {
                try {
                	response.close();
                } catch (IOException e) {
                	e.printStackTrace();
            	}
            }
            if (httpClient != null) {
                try {
                	httpClient.close();
				} catch (IOException e) {
                	e.printStackTrace();
            	}   
            }
            
        }

    }
}

1|3Post请求带参数

1|0application/x-www-form-urlencoded

这是默认的Content-type

使用 HttpClient 发送不带参数的 post 请求与发送不带参数的 get 请求类似, 只是 HttpGet 要改为 HttpPost, 所以只记录带参数的 post 请求方式

public class HttpPostParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpPost对象, 设置url访问地址
        HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");

        // 声明List集合, 封装表单中的参数
        ArrayList<NameValuePair> params = new ArrayList<NameValuePair>();
        params.add(new BasicNameValuePair("keys", "Java"));

        // 创建表单的Entity对象, 第一个参数就是封装好的表单数据, 第二个参数就是编码
        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params, "utf8");

        // 设置表单的Entity对象到 Post 请求中
        httpPost.setEntity(formEntity);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpPost);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

1|0application/json

public class HttpPostParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpPost对象, 设置url访问地址
        HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");
		
        // string:是一个json字符串
        JSONObject jsonObj = new JSONObject();
        jsonObj.put("userName","mingzi");
        jsonObj.put("password","abc123");
        StringEntity jsonEntity = new StringEntity(jsonObj.toString(),Consts.UTF_8);
        // 也需要给 entity 设置一下内容类型
        jsonEntity.setContentType(new BasicHeader("Content-Type","application/json; charset=utf-8"));
        // 设置entity的编码
        jsonEntity.setContentEncoding(Consts.UTF_8.name());
		httpPost.setEntity(jsonEntity);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpPost);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

1|0multipart/form-data类型上传文件

1|4连接池

public class HttpClientPoolTest {

    public static void main(String[] args) {
        // 创建连接池管理器
        PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
        
        // 设置连接数
        cm.setMaxTotal(100);
        
        // 设置每个主机的最大连接数
        cm.setDefaultMaxPerRoute(10);

        // 使用连接池管理器发起请求
        doGet(cm);
        doGet(cm);
    }

    private static void doGet(PoolingHttpClientConnectionManager cm) {
        // 不是每次创建新的HttpClient, 而是从连接池中获取 HttpClient 对象
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

        HttpGet httpGet = new HttpGet("http://www.itcast.cn");

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
           if (response != null) {
               try {
                   response.close();
               } catch (IOException e) {
                   e.printStackTrace();
               }
               // 不能关闭 HttpClient, 由连接池管理 HttpClient
               // httpClient. close();
           }
        }
    }
}

1|5请求参数配置

setConnectTimeout:连接超时，ms，完成tcp3次握手的时间上限
setSocketTimeout:读取超时，ms，表示从请求的网址处获得响应数据的时间间隔
setConnectionRequestTimeout：指的从连接池里面获取connection 的超时时间

pubsetConnectionRequestTimeousetConnectionRequestTimeouttlic class HttpConfigTest {

    public static void main(String[] args) {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://yun.itheima.com/search?keys=Java");

        // 配置请求信息
        RequestConfig config = RequestConfig.custom().setConnectTimeout(1000) // 创建连接的最长时间, 单位是毫秒
                .setConnectionRequestTimeout(500) // 设置获取连接的最长时间, 单位是毫秒
                .setSocketTimeout(10 * 1000) // 设置数据传输的最长时间, 单位是毫秒
                .build();

        // 给请求设置请求信息
        httpGet.setConfig(config);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

2|0Jsoup

jsoup 是一款Java 的HTML解析器，可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API，可通过DOM，CSS以及类似于jQuery的操作方法来取出和操作数据。

jsoup的主要功能如下：

从一个URL，文件或字符串中解析HTML；
使用DOM或CSS选择器来查找、取出数据；
可操作HTML元素、属性、文本；( Jsoup一般用于解析爬到的数据并存储, 很少用到操作 )

2|1准备

先在pom中导入依赖, 搭建开发环境

<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
    <!--Jsoup-->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.11.3</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/junit/junit -->
    <!--测试-->
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
        <scope>test</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
    <!--操作文件-->
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.4</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
    <!--操作字符串-->
    <dependency>
        <groupId>org.apache.commons</groupId>
        <artifactId>commons-lang3</artifactId>
        <version>3.4</version>
    </dependency>

2|2Jsoup解析URL

Jsoup可以直接输入url，它会发起请求并获取数据，封装为Document对象

public class JsoupFirstTest {

    @Test
    public void testUrl() throws Exception {
        // 解析url地址, 第一个参数是访问的url, 第二个参数时访问时候的超时时间
        Document doc = Jsoup.parse(new URL("http://www.itcast.cn"), 1000);

        // 使用标签选择器, 获取title标签中的内容
        String title = doc.getElementsByTag("title").first().text();

        // 打印
        System.out.println(title);
    }
}

PS：虽然使用Jsoup可以替代HttpClient直接发起请求解析数据，但是往往不会这样用，因为实际的开发过程中，需要使用到多线程，连接池，代理等等方式，而jsoup对这些的支持并不是很好，所以我们一般把jsoup仅仅作为Html解析工具使用

2|3Jsoup解析字符串

 	@Test
    public void testString() throws Exception {
        // 使用工具类读取文件, 获取字符串
        String content = FileUtils.readFileToString(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");

        // 解析字符串
        Document doc = Jsoup.parse(content);

        String title = doc.getElementsByTag("title").first().text();

        System.out.println(title);

    }
}

2|4Jsoup解析文件

	@Test
    public void testFile() throws Exception {
        // 解析文件
        Document doc = Jsoup.parse(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");

        String title = doc.getElementsByTag("title").first().text();

        System.out.println(title);

    }

所需 html文件

<html>
 <head> 
  <title>传智播客官网-一样的教育,不一样的品质</title> 
 </head> 
 <body>
	<div class="city">
		<h3 id="city_bj">北京中心</h3>
		<fb:img src="/2018czgw/images/slogan.jpg" class="slogan"/>
		<div class="city_in">
			<div class="city_con" style="display: none;">
				<ul>
					<li id="test" class="class_a class_b">
						<a href="http://www.itcast.cn" target="_blank">
							<span class="s_name">北京</span>
						</a>
					</li>
					<li>
						<a href="http://sh.itcast.cn" target="_blank">
							<span class="s_name">上海</span>
						</a>
					</li>
					<li>
						<a href="http://gz.itcast.cn" target="_blank">
							<span abc="123" class="s_name">广州</span>
						</a>
					</li>
					<ul>
						<li>天津</li>
					</ul>					
				</ul>
			</div>
		</div>
	</div>
 </body>
</html>

2|5使用Dom方式遍历文档

  @Test
    public void testDom() throws Exception {
        // 解析文件件, 获取Document对象
        Document doc = Jsoup.parse(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");
        // 1.根据id查询元素getElementById
        Element element = doc.getElementById("city_bj");
        // 2.根据标签获取元素getElementsByTag
        Elements spans = doc.getElementsByTag("span");
        // 3.根据class获取元素getElementsByClass
        Element a = doc.getElementsByClass("class_a class_b").first();
        // 4.根据属性获取元素getElementsByAttribute
        Element abc = doc.getElementsByAttribute("abc").first();
        // 5.根据属性与属性值筛选
        Element href = doc.getElementsByAttributeValue("href", "http://sh.itcast.cn").first();

        // 打印元素内容
        System.out.println("获取到的元素内容是: " + element.text());
        for (Element span : spans) {
            System.out.println(span.text());
        }
        System.out.println(a.text());
        System.out.println("abc.text() = " + abc.text());
        System.out.println("href.text() = " + href.text());
    }

2|6Selector选择器组合使用

@Test
    public void testSelector2() throws Exception {
        // 解析html文件, 获取Document对象
        Document doc = Jsoup.parse(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");

        //el#id: 元素+ID，比如： h3#city_bj
        //Element element = doc.select("h3#city_bj").first();

        //el.class: 元素+class，比如： li.class_a
        //Element element = doc.select("li.class_a").first();

        //el[attr]: 元素+属性名，比如： span[abc]
        //Element element = doc.select("span[abc]").first();

        //任意组合: 比如：span[abc].s_name
        Element element = doc.select("span[abc].s_name").first();

        //ancestor child: 查找某个元素下子元素，比如：.city_con li 查找"city_con"下的所有li
        //Elements elements = doc.select(".city_con li");

        //parent > child: 查找某个父元素下的直接子元素，比如：
        // .city_con > ul > li 查找city_con第一级（直接子元素）的ul，再找所有ul下的第一级li
        //Elements elements = doc.select(".city_con > ul > li");

        //parent > *: 查找某个父元素下所有直接子元素
        Elements elements = doc.select(".city_con > ul > *");

        // 打印
        System.out.println("获取到的内容是: " + element.text());
        for (Element ele : elements) {
            System.out.println("遍历的结果: " + ele.text());
        }
    }

__EOF__

本文作者：😎
本文链接：https://www.cnblogs.com/dongye95/p/16122460.html
关于博主：评论和私信会在第一时间回复。或者直接私信我。
版权声明：本博客所有文章除特别声明外，均采用 BY-NC-SA 许可协议。转载请注明出处！
声援博主：如果您觉得文章对您有帮助，可以点击文章右下角【推荐】一下。您的鼓励是博主的最大动力！

posted @ 2022-04-09 16:27 dongye95 阅读(122) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· java操作sip

· selenium - java

· HttpClient

· httpClient

· HttpClient

阅读排行：
· 单线程的Redis速度为什么快？
· 展开说说关于C#中ORM框架的用法！
· Pantheons：用 TypeScript 打造主流大模型对话的一站式集成库
· SQL Server 2025 AI相关能力初探
· 为什么退出登录或修改密码无法使 token 失效

公告

HttpClient

发表于 2022-04-09 16:27阅读：122评论：0推荐：0

爬虫

关注

跳至底部

昵称： dongye95
园龄： 6年10个月
粉丝： 108
关注： 12

+加关注

dongye95

HttpClient

1|0HttpClient

1|1准备

1|0maven

1|0创建 slf4j 日志配置文件

1|0最简单的爬虫程序

1|2GET 请求

1|0无参无头

1|0带请求头

1|0请求带参数

1|0获取响应头以及相应的Content-Type

1|0保存网络图片到本地

1|0设置访问代理

1|3Post请求带参数

1|0application/x-www-form-urlencoded

1|0application/json

1|0multipart/form-data类型上传文件

1|4连接池

1|5请求参数配置

2|0Jsoup

2|1准备

2|2Jsoup解析URL

2|3Jsoup解析字符串

2|4Jsoup解析文件

2|5使用Dom方式遍历文档

2|6Selector选择器组合使用

公告

😎

HttpClient

搜索

常用链接

我的标签

随笔分类

随笔档案

阅读排行榜

评论排行榜

推荐排行榜

最新评论