java实现获取百度/微博/头条/知乎热榜数据
java实现定时获取百度/微博/头条/知乎热榜数据,做一个热榜数据榜单。
目录
一、效果展示
二、热搜榜单
百度热搜榜单:https://top.baidu.com/board?tab=realtime
微博热搜榜单:https://weibo.com/ajax/side/hotSearch
头条热搜榜单:https://www.toutiao.com/hot-event/hot-board/?origin=toutiao_pc
知乎热搜榜单:https://www.zhihu.com/api/v3/feed/topstory/hot-lists/total?limit=50&desktop=true
三、 程序代码
package com.ruoyi.cms.task;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.ruoyi.cms.constant.RedisKeys;
import com.ruoyi.cms.domain.News;
import com.ruoyi.common.core.redis.RedisCache;
import com.xkcoding.http.HttpUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Component("news")
public class NewsTask {
@Autowired
private RedisCache redisCache;
/**
* 条数限制
*/
private final static int maxLimitCount = 15;
/**
* 榜单任务
*/
public void task() {
baidu();
weibo();
toutiao();
zhihu();
}
/**
* 百度热搜
*/
public void baidu() {
String baiduUrl = "https://top.baidu.com/board?tab=realtime";
String baiduHtml = HttpUtil.get(baiduUrl);
Document doc = Jsoup.parse(baiduHtml);
Elements ul = doc.select(".content_1YWBm");
List<News> list = new ArrayList<>();
for (Element li : ul) {
String url = li.select("a").attr("href");
String title = li.select(".c-single-text-ellipsis").text();
News news = new News();
news.setTitle(title);
news.setUrl(url);
list.add(news);
}
List<News> collect = list.stream().limit(maxLimitCount).collect(Collectors.toList());
redisCache.deleteObject(RedisKeys.hot_search + "baidu");
redisCache.setCacheList(RedisKeys.hot_search + "baidu", collect);
}
/**
* 微博热搜
*/
public void weibo() {
String weiboUrl = "https://weibo.com/ajax/side/hotSearch";
String weiboJson = HttpUtil.get(weiboUrl);
List<Map<String, String>> realtimeList = (List<Map<String, String>>) JSONUtil.getByPath(JSONUtil.parse(weiboJson), "data.realtime");
List<News> list = new ArrayList<>();
realtimeList.forEach(item -> {
String title = item.get("word");
String url = "https://s.weibo.com/weibo?q=%23" + title + "%23";
News news = new News();
news.setTitle(title);
news.setUrl(url);
list.add(news);
});
List<News> collect = list.stream().limit(maxLimitCount).collect(Collectors.toList());
redisCache.deleteObject(RedisKeys.hot_search + "weibo");
redisCache.setCacheList(RedisKeys.hot_search + "weibo", collect);
}
/**
* 头条热搜
*/
public void toutiao() {
String toutiaoUrl = "https://www.toutiao.com/hot-event/hot-board/?origin=toutiao_pc";
String toutiaoJson = HttpUtil.get(toutiaoUrl);
List<Map<String, String>> data = (List<Map<String, String>>) JSONUtil.getByPath(JSONUtil.parse(toutiaoJson), "data");
List<Map<String, String>> fixed_top_data = (List<Map<String, String>>) JSONUtil.getByPath(JSONUtil.parse(toutiaoJson), "fixed_top_data");
data.add(0, fixed_top_data.get(0));
List<News> list = new ArrayList<>();
data.forEach(item -> {
String title = item.get("Title");
String url = item.get("Url");
News news = new News();
news.setTitle(title);
news.setUrl(url);
list.add(news);
});
List<News> collect = list.stream().limit(maxLimitCount).collect(Collectors.toList());
redisCache.deleteObject(RedisKeys.hot_search + "toutiao");
redisCache.setCacheList(RedisKeys.hot_search + "toutiao", collect);
}
/**
* 知乎热搜
*/
public void zhihu() {
String zhihuUrl = "https://www.zhihu.com/api/v3/feed/topstory/hot-lists/total?limit=50&desktop=true";
String zhihuJson = HttpUtil.get(zhihuUrl);
List<Map<String, JSONObject>> data = (List<Map<String, JSONObject>>) JSONUtil.getByPath(JSONUtil.parse(zhihuJson), "data");
List<News> list = new ArrayList<>();
data.forEach(item -> {
String title = (String) item.get("target").get("title");
String cardId = String.valueOf(item.get("card_id")).split("_")[1];
String url = "https://www.zhihu.com/question/" + cardId;
News news = new News();
news.setTitle(title);
news.setUrl(url);
list.add(news);
});
List<News> collect = list.stream().limit(maxLimitCount).collect(Collectors.toList());
redisCache.deleteObject(RedisKeys.hot_search + "zhihu");
redisCache.setCacheList(RedisKeys.hot_search + "zhihu", collect);
}
}