ElasticSearch 8.x 实战

最终实现效果：

爬虫

爬取数据：获取请求返回的页面信息，筛选出我们想要的数据就可以了，直接使用 jsoup 包

pom.xml 导入依赖

<!--解析网页-->
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.16.1</version>
</dependency>

新建 pojo 文件夹，新建 Java 类 Content

@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
    private String title;
    private String img;
    private String price;
}

新建 utils 文件夹，新建 Java 类 HtmlParseUtil，注意需要先登录京东，获得 Cookie，填充在下列代码中

@Component
public class HtmlParseUtil {

    public static void main(String[] args) throws Exception {
        new HtmlParseUtil().parseJD("java").forEach(System.out::println);
    }

    public List<Content> parseJD(String keyword) throws Exception {
        // 获取请求 https://search.jd.com/Search?keyword=java
        // 前提，需要联网，ajax 不能获取到
        String url = "https://search.jd.com/Search?keyword=" + keyword;

        // 解析网页(Jsoup 返回的 Document 就是浏览器 Document 对象)
        // Document document = Jsoup.parse(new URL(url), 30000);  // 方法失效，需要登录

        // 新方法：使用 Cookie 登录
        // 如何获取 Cookie：登录京东后，打开控制台，输入 document.cookie
        String cookies = "";
        Document document = Jsoup.connect(url)
                .cookie("Cookie", cookies)
                .get();

        // 所有在 js 中可以使用的方法，这里都能用
        Element element = document.getElementById("J_goodsList");
        // System.out.println(element.html());
        // 获取所有的 li 元素
        ArrayList<Content> goodsList = new ArrayList<>();
        if (element != null) {
            Elements elements = element.getElementsByTag("li");
            // 获取元素中的内容，这里的 el 就是每一个 li 标签了
            for (Element el : elements) {
                // 关于这种图片特别多的网站，所有的图片都是延迟加载的
                // data-lazy-img
                if (el.attr("class").equalsIgnoreCase("gl-item")) {
                    String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
                    String price = el.getElementsByClass("p-price").eq(0).text();
                    String title = el.getElementsByClass("p-name").eq(0).text();
                    goodsList.add(new Content(title, img, price));
                }
            }
        }
        return goodsList;
    }
}

新建 service 文件夹，新建 Java 类 ContentService

// 业务编写
@Service
public class ContentService {

    @Autowired
    private ElasticsearchClient elasticsearchClient;

    // 1. 解析数据放入 es 索引
    public Boolean parseContent(String keywords) throws Exception {
        List<Content> contents = new HtmlParseUtil().parseJD(keywords);
        // 把查询的数据放入 es 中
        List<BulkOperation> bulkOperationArrayList = new ArrayList<>();
        // 遍历添加到 bulk 中
        // 如果不写 id 的话，就是随机 id
        for (Content content : contents) {
            bulkOperationArrayList.add(BulkOperation.of(o -> o.index(i -> i.document(content))));
        }

        BulkResponse bulkResponse = elasticsearchClient.bulk(b -> b.index("jd_goods")
                .operations(bulkOperationArrayList));
        return !bulkResponse.errors();

    }

    // 2. 获取这些数据
    public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) {
        try {
            if (pageNo <= 0) {
                pageNo = 1;
            }
            if (pageSize <= 0) {
                pageSize = 10;
            }
            // 条件搜索
            int finalPageNo = pageNo;
            int finalPageSize = pageSize;
            SearchResponse<Content> searchResponse = elasticsearchClient.search(s -> s
                    .index("jd_goods")
                    //查询 title 字段包含关键字的 document (不使用分词器精确查找)
                    .query(q -> q
                            .term(t -> t
                                    .field("title")
                                    .value(v -> v.stringValue(keyword))
                            ))
                    //分页查询，从第 PageNo 页开始查询 PageSize 个 document
                    .from(finalPageNo)
                    .size(finalPageSize), Content.class
            );
            // 解析结果
            List<Map<String, Object>> list = new ArrayList<>();
            ObjectMapper objectMapper = new ObjectMapper();
            for (Hit<Content> hit : searchResponse.hits().hits()) {
                Content content = hit.source();
                Map<String, Object> map = objectMapper.convertValue(content, new TypeReference<Map<String, Object>>() {
                });
                list.add(map);
            }
            return list;


        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

}

新建 controller 文件夹，新建 Java 类 ContentController

@RestController
public class ContentController {

    @Autowired
    private ContentService contentService;

    @GetMapping("/parse/{keywords}")
    public Boolean parse(@PathVariable("keywords") String keywords) throws Exception {
        return contentService.parseContent(keywords);
    }

    @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
    public List<Map<String, Object>> search(@PathVariable("keyword") String keyword,
                                            @PathVariable("pageNo") int pageNo,
                                            @PathVariable("pageSize") int pageSize) throws Exception {
        return contentService.searchPage(keyword, pageNo, pageSize);
    }
}

测试：在浏览器中输入 http://localhost:9090/search/java/1/10 查看数据

前后端分离

使用 Vue 实现前后端分离，具体实现步骤如下：

新建文件夹，注意名字不要出现中文，进入 cmd 下载 vue 和 axios

npm install vue
npm install axios

将 axios.min.js 和 vue.min.js 放入 resources/static/js 中

在 index.html 使用 Vue

数据动态绑定：以 input 标签的 v-model 属性声明

<!--前端使用 Vue 实现前后端分离-->
<form name="searchTop" class="mallSearch-form clearfix">
    <fieldset>
        <legend>天猫搜索</legend>
        <div class="mallSearch-input clearfix">
            <div class="s-combobox" id="s-combobox-685">
                <div class="s-combobox-input-wrap">
                    <input type="text" v-model="keyword" autocomplete="off"
                           value="dd"
                           id="mq" class="s-combobox-input" aria-haspopup="true">
                </div>
            </div>
            <button @click.prevent="searchKey" type="submit" id="searchbtn">搜索
            </button>
        </div>
    </fieldset>
</form>

搜索按钮点击后会调用 searchKey 函数，Vue 与后端进行交互，返回并绑定数据

<!--前端使用 Vue 实现前后端分离-->
<script th:src="@{/js/axios.min.js}"></script>
<script th:src="@{/js/vue.min.js}"></script>
<script>
    new Vue({
        el: '#app',
        data: {
            keyword: '', // 搜索的关键字
            results: []  // 搜索的结果
        },
        methods: {
            searchKey() {
                // 从页面输入接收搜索关键字
                let keyword = this.keyword;
                // 调用后端接口，返回数据，并绑定数据
                axios.get("search/" + keyword + "/1/10").then(response => {
                    this.results = response.data; 
                })
            }
        }
    })
</script>

注意：在使用 Vue 后，Controller 只需返回数据，不再负责页面的跳转；Vue 接收后端返回的数据，然后将数据渲染在前端页面中，如有需要，Vue 会通过路由实现页面的跳转。

搜索高亮

修改 service/ContentService

// 2. 获取这些数据
public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) {
    try {
        if (pageNo <= 0) {
            pageNo = 1;
        }
        if (pageSize <= 0) {
            pageSize = 10;
        }
        // 条件搜索
        int finalPageNo = pageNo;
        int finalPageSize = pageSize;
        SearchResponse<Content> searchResponse = elasticsearchClient.search(s -> s
                .index("jd_goods")
                // 查询 title 字段包含关键字的 document (不使用分词器精确查找)
                .query(q -> q
                        .term(t -> t
                                .field("title")
                                .value(v -> v.stringValue(keyword))
                        ))
                // 高亮 title 字段
                .highlight(h -> h
                        .fields("title", f -> f
                                .preTags("<font color='red'>")
                                .postTags("</font>")))
                // 分页查询，从第 PageNo 页开始查询 PageSize 个 document
                .from(finalPageNo)
                .size(finalPageSize), Content.class
        );
        // 解析结果
        List<Map<String, Object>> list = new ArrayList<>();
        ObjectMapper objectMapper = new ObjectMapper();
        for (Hit<Content> hit : searchResponse.hits().hits()) {
            Content content = hit.source();
            Map<String, Object> map = objectMapper.convertValue(content, new TypeReference<Map<String, Object>>() {
            });
            // 解析高亮字段
            Map<String, List<String>> highlightFields = hit.highlight();
            List<String> title = highlightFields.get("title");
            if (title != null) {
                map.put("title", title.get(0));
            }
            list.add(map);
        }
        return list;


    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

修改 index.html

<!--标题-->
<p class="productTitle">
    <a v-html="result.title"> </a>
</p>

参考资料

posted @ 2023-05-15 14:12 Lockegogo 阅读(332) 评论(0) 编辑收藏举报

刷新页面返回顶部

ElasticSearch 8.x 实战

ElasticSearch 8.x 实战

爬虫

前后端分离

搜索高亮

参考资料

公告