爬取京东商品集成ES搜索
效果展示如下:
爬取京东数据
当前时间可以正确解析,后续京东修改页面后可能需要做对应代码调整
public static List<JdProduct> fetchJD(String keyword) throws IOException {
String url = "https://search.jd.com/Search?keyword=" + keyword;
//解析网页 jsoup返回的就是Documnet对象
Document parse = Jsoup.parse(new URL(url), 30000);
Element j_goodsList = parse.getElementById("J_goodsList");
// System.out.println(j_goodsList.html());
//获取所有li
Elements li = j_goodsList.getElementsByTag("li");
List<JdProduct> list = Lists.newArrayList();
for (Element e : li) {
//关于这种图片特别多网站 图片都是懒加载的 source-data-lazy-img
// String img = e.getElementsByTag("img").eq(0).attr("src");
String img = "https:" + e.getElementsByTag("img").eq(0).attr("data-lazy-img");
String name = e.getElementsByClass("p-name").eq(0).get(0).getElementsByTag("em").text();
String price = e.getElementsByClass("p-price").eq(0).get(0).getElementsByTag("i").text();
// System.out.println("img = " + img + ", name = " + name + ", price = " + price);
list.add(new JdProduct(img, price, name));
}
return list;
}
数据导入ES
public boolean fetchJd(String keyword) {
BulkRequest bulkRequest = new BulkRequest(ESConstants.JD_GOODS);
try {
bulkRequest.timeout("2m");
HtmlParseUtil.fetchJD(keyword).stream().forEach(e -> {
bulkRequest.add(
new IndexRequest().source(JSON.toJSONString(e), XContentType.JSON)
);
});
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
实现搜索并高亮
public List<Map<String, Object>> searchPage(
String keyword,
int pageNo,
int pageSize,
boolean highlight
) throws IOException {
pageNo = pageNo <=1 ? 1 : pageNo;
pageSize = pageSize <=1 ? 1 : pageSize;
SearchRequest searchRequest = new SearchRequest(ESConstants.JD_GOODS);
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.from(pageNo);
builder.size(pageSize);
//精准匹配
TermQueryBuilder title = QueryBuilders.termQuery("name", keyword);
builder.query(title);
builder.timeout(new TimeValue(60, TimeUnit.SECONDS));
if(highlight) {
//设置高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("name");
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
//只高亮一个符合条件的
highlightBuilder.requireFieldMatch(false);
builder.highlighter(highlightBuilder);
}
searchRequest.source(builder);
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
List<Map<String, Object>> list = Lists.newArrayList();
Arrays.stream(search.getHits().getHits()).forEach(e -> {
if(highlight) {
// 解析高亮的字段
Map<String, HighlightField> highlightFields = e.getHighlightFields();
HighlightField name = highlightFields.get("name");
Map<String, Object> sourceAsMap = e.getSourceAsMap();
if (name != null) {
Text[] fragments = name.getFragments();
String newName = "";
for (Text text : fragments) {
newName += text;
}
//高亮字段替换原来的字段
sourceAsMap.put("name", newName);
}
}
list.add(e.getSourceAsMap());
});
return list;
}
使用Vue实现前后端分离
- 新建文件夹 安装Vue、axios
npm init
npm install vue
npm install axios
-
进入下载下来的源码
dist
目录 -
将源码导入到项目中
-
前端页面编写Vue代码
<script th:src="@{/js/axios.min.js}"></script>
<script th:src="@{/js/vue.min.js}"></script>
<script>
new Vue({
el: "#app",
data: {
keyword: "", //搜索关键字
results: [] //搜索的结果
},
methods: {
searchKey(){
const keyword = this.keyword;
console.log(keyword);
//对接后端接口
axios.get('jd/search/' + keyword + "/1/10" ).then(e => {
this.results = e.data;
})
}
}
})
</script>