03实战测试

初始化项目

1、启动es服务和客户端

2、使用springboot快速构建服务

qV3gp9.png

3、修改版本依赖!

<properties>
    <java.version>1.8</java.version>
    <!-- 这里SpringBoot默认配置的版本不匹配,我们需要自己配置版本! -->
    <elasticsearch.version>7.6.1</elasticsearch.version>
</properties>

4、配置 application.properties 文件

server.port=9090
# 关闭 thymeleaf的缓存
spring.thymeleaf.cache=false

5、导入前端的素材!修改为Thymeleaf支持的格式!

1 <html xmlns:th="http://www.thymeleaf.org">

6、编写IndexController进行跳转测试!

@Controller
public class IndexController {

    @GetMapping({"/","/index"})
    public String index() {
        return "index";
    }
}

qV3x78.png

jsoup讲解

1、导入jsoup的依赖

<!-- 解析网页jsoup-->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.13.1</version>
</dependency>

2、编写一个工具类 HtmlParseUtil

package com.edgar.utils;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.URL;

public class HtmlParseUtil {

    public static void main(String[] args) throws Exception {
        // jsoup不能抓取ajax的请求,除非自己模拟浏览器进行请求!

        // 1.获取请求 https://search.jd.com/Search?keyword=java&enc=utf-8
        String url = "https://search.jd.com/Search?keyword=java&enc=utf-8";
        // 2、解析网页(需要联网)
        Document document = Jsoup.parse(new URL(url), 30000);

        // 3、抓取搜索到的数据!
        // Document 就是我们JS的Document对象,你可以看到很多JS语法
        Element element = document.getElementById("J_goodsList");

        // 4、找到所有的li元素
        Elements elements = element.getElementsByTag("li");

        // 获取京东的商品信息
        for (Element el : elements) {
            // 这种网站,一般为了保证效率,一般会延时加载图片
            // String img = el.getElementsByTag("img").eq(0).attr("src");
            String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();

            System.out.println(img);
            System.out.println(price);
            System.out.println(title);
            System.out.println("=========================================");
        }
    }
}

3、封装一个实体类保存爬取下来的数据

package com.edgar.pojo;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
    private String title;
    private String img;
    private String prise;
    // 可以自己添加属性!
}

4、封装为工具使用!

/**
 * @author Edgar
 * @param keywords 要搜索的关键字
 * @return 抓取的商品集合
 * @throws Exception
 */
public List<Content> parseJD(String keywords) throws Exception {
    String url = "https://search.jd.com/Search?keyword=" + keywords + "&enc=utf-8";
    Document document = Jsoup.parse(new URL(url), 30000);
    Element element = document.getElementById("J_goodsList");
    Elements elements = element.getElementsByTag("li");

    List<Content> goodsList = new ArrayList<>();
    // 获取京东的商品信息
    for (Element el : elements) {
        // 这种网站,一般为了保证效率,一般会延时加载图片
        // String img = el.getElementsByTag("img").eq(0).attr("src");
        String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
        String price = el.getElementsByClass("p-price").eq(0).text();
        String title = el.getElementsByClass("p-name").eq(0).text();

        Content content = new Content();
        content.setImg(img);
        content.setPrise(price);
        content.setTitle(title);
        goodsList.add(content);
    }
    return goodsList;
}

5、测试工具类的使用!

public static void main(String[] args) throws Exception {
    new HtmlParseUtil().parseJD("码出高效").forEach(System.out::println);
}

搞定收工!简单爬虫编写完毕!我们这里的数据就使用爬取的即可,平时开发es的数据可能来自多个地方,你们可以从数据库查询获取也是一样的,后面我们来测试下效果!

qwE0Wd.png

业务编写

1、导入ElasticsearchClientConfig 配置类

package com.edgar.config;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

// 1.找对象
// 2.放到Spring中待用
// 3.如果是Springboot,就先分析源码
// xxxAutoConfiguration xxxProperties
@Configuration
public class ElasticsearchClientConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost", 9200, "http")));
        return client;
    }
}

2、编写业务!

package com.edgar.service;

import com.alibaba.fastjson.JSON;
import com.edgar.pojo.Content;
import com.edgar.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

// 业务编写
@Service
public class ContentService {

    @Autowired
    private RestHighLevelClient restHighLevelClient;

    // 1、解析数据放入 es 索引中
    public boolean parseContent(String keywords) throws Exception {
        List<Content> contents = new HtmlParseUtil().parseJD(keywords);
        // 把查询到的数据放入 es 中
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");

        for (int i = 0; i < contents.size(); i++) {
            bulkRequest.add(
                    new IndexRequest("jd_goods")
                    .source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }

        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return !bulk.hasFailures();
    }

    // 2、获取这些数据实现搜索功能
    public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo < 1) {
            pageNo = 1;
        }

        // 条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        // 分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);

        // 精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.MILLISECONDS));

        // 执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        // 解析结果
        List<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit hit : searchResponse.getHits().getHits()) {
            list.add(hit.getSourceAsMap());
        }
        return list;
    }
}

3、controller

package com.edgar.controller;

import com.edgar.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;
import java.util.List;
import java.util.Map;

// 请求编写
@RestController
public class ContentController {

    @Autowired
    ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public boolean parse(@PathVariable("keyword") String keyword) throws Exception {
        return contentService.parseContent(keyword);
    }

    @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
    public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                           @PathVariable("pageNo") int pageNo,
                                           @PathVariable("pageSize") int pageSize) throws IOException {
        return contentService.searchPage(keyword,pageNo,pageSize);
    }

}

前端逻辑

1、定义导入vue和axios的依赖!

<script th:src="@{/js/axios.min.js}"></script>
<script th:src="@{/js/vue.min.js}"></script>

2、初始化Vue对象,给外层div绑定app对象!

<script>
	new Vue({
		el: '#app',
		data: {
			keyword: '', // 搜索关键字
			results: [] // 搜索的结果
		}
	})
</script>

3、绑定搜索框及相关事件!

qsmQJ0.png

4、编写方法,获取后端传递的数据!

<script>
    new Vue({
        el: '#app',
        data: {
            keyword: '', // 搜索的关键字
            results: [] // 搜索的结果
        },
        methods: {
            searchKey() {
                var keyword = this.keyword;
                console.log(keyword);
                // 对接后端的接口
                axios.get('/search/' + keyword + "/1/10").then(response => {
                    console.log(response);
                    this.results = response.data; // 绑定数据!
                })
            }

        }
    })
</script>

5、渲染解析回来的数据!

qsnCm4.png

效果图:

qsnZp6.png

搜索高亮

1、编写业务类,处理高亮字段

// 3、获取这些数据实现搜索高亮功能
public List<Map<String,Object>> searchContentHighlighter(String keyword,int pageNo,int pageSize) throws IOException {
    if (pageNo < 1) {
        pageNo = 1;
    }

    // 条件搜索
    SearchRequest searchRequest = new SearchRequest("jd_goods");
    SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

    // 分页
    sourceBuilder.from(pageNo);
    sourceBuilder.size(pageSize);

    // 精准匹配
    TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
    sourceBuilder.query(termQueryBuilder);
    sourceBuilder.timeout(new TimeValue(60, TimeUnit.MILLISECONDS));

    // 高亮
    HighlightBuilder highlightBuilder = new HighlightBuilder(); // 生成高亮查询器
    highlightBuilder.field("title"); // 高亮查询字段
    highlightBuilder.requireFieldMatch(false); // 如果要多个字段高亮,这项要为false
    highlightBuilder.preTags("<span style='color:red'>");
    highlightBuilder.postTags("</span>");
    sourceBuilder.highlighter(highlightBuilder);
    // 执行搜索
    searchRequest.source(sourceBuilder);
    SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

    // 解析结果
    List<Map<String,Object>> list = new ArrayList<>();
    for (SearchHit hit : searchResponse.getHits().getHits()) {

        Map<String, HighlightField> highlightFields = hit.getHighlightFields();
        HighlightField title = highlightFields.get("title");
        Map<String, Object> sourceAsMap = hit.getSourceAsMap(); // 原来的结果
        // 解析高亮的字段,将原来的字段换为我们高亮的字段即可!
        if (title != null) {
            Text[] fragments = title.fragments();
            System.out.println(fragments);
            String n_title = "";
            for (Text text : fragments) {
                n_title += text;
            }
            sourceAsMap.put("title",n_title); // 高亮的字段替换掉原来的内容即可!
        }
        list.add(sourceAsMap);
    }
    return list;
}

2、controller层调用新的高亮业务!

@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                       @PathVariable("pageNo") int pageNo,
                                       @PathVariable("pageSize") int pageSize) throws IOException {
    return contentService.searchContentHighlighter(keyword,pageNo,pageSize);
}

3、前端vue指令解析html!

<!--标题-->
<p class="productTitle">
	<a v-html="result.title"> </a>
</p>

4、最终效果!

qcV0v8.png

posted @ 2022-03-29 22:00  EdgarStudy  阅读(45)  评论(0编辑  收藏  举报