微博爬数据
void getUser(){ Map<String, Integer> map = new HashMap<>(); HashMap<String, String> headers = new HashMap<>(); headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0"); headers.put("Cookie","SUB=_2A25OBB9gDeRhGeNG4lIZ8ybPyziIHXVtBqEorDV8PUJbkNAKLU7jkW1NSxZ3rk-lltjdVTJWAuJrtG1N-6THcXTY; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF0Mxd1XQQmx6HGO.e6BGsD5NHD95Qf1h.71heRe05XWs4Dqcj.i--fi-z7iKysi--RiKyWi-zpi--ci-2XiK.Ei--fiK.Ei-24; XSRF-TOKEN=ZxMS7NEE0TvXaeGc55l4CcWl; _s_tentry=weibo.com; Apache=2654538524812.041.1660972627006; SINAGLOBAL=2654538524812.041.1660972627006; ULV=1660972627078:1:1:1:2654538524812.041.1660972627006:; SSOLoginState=1660972849; WBPSESS=7pJQxz1_dPdMSL7AXnCXDf0T9olQ0YfW5LtecSt6SMnNs6oSz17JJhurTo7Zik1em1LCWoqCVL9m0scGsEictmrfwhqolExW-PYkh6TLS9C7vnatXY6ZBWEQsnj0vwcgKwilI1AKAgAxRGvUsmNr0w=="); String s = HttpClientUtil.doGet("https://weibo.com/ajax/feed/unreadfriendstimeline?list_id=100015890838304&refresh=4&max_id=1661129223609925&count=15", null, headers); com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject.parseObject(s); JSONArray statuses = jsonObject.getJSONArray("statuses"); Object o = statuses.get(0); String s1 = com.alibaba.fastjson.JSONObject.toJSONString(o); com.alibaba.fastjson.JSONObject jsonObject1 = com.alibaba.fastjson.JSONObject.parseObject(s1); com.alibaba.fastjson.JSONObject user = jsonObject1.getJSONObject("user"); Object id = user.get("id"); HashMap<String, String> headers1 = new HashMap<>(); headers1.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0"); headers1.put("Cookie","SUB=_2A25OBB9gDeRhGeNG4lIZ8ybPyziIHXVtBqEorDV8PUJbkNAKLU7jkW1NSxZ3rk-lltjdVTJWAuJrtG1N-6THcXTY; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF0Mxd1XQQmx6HGO.e6BGsD5NHD95Qf1h.71heRe05XWs4Dqcj.i--fi-z7iKysi--RiKyWi-zpi--ci-2XiK.Ei--fiK.Ei-24; XSRF-TOKEN=ZxMS7NEE0TvXaeGc55l4CcWl; _s_tentry=weibo.com; Apache=2654538524812.041.1660972627006; SINAGLOBAL=2654538524812.041.1660972627006; ULV=1660972627078:1:1:1:2654538524812.041.1660972627006:; SSOLoginState=1660972849; WBPSESS=7pJQxz1_dPdMSL7AXnCXDf0T9olQ0YfW5LtecSt6SMnNs6oSz17JJhurTo7Zik1eGqyoXUKpM2W41KIDCDkOtBuCGPNyE1P0pDIpMdWKlnnTcm5T7J1kiGnlPV4m-csfCRzAfpmH0TG_aa3UuId66w=="); String format = String.format("https://weibo.com/ajax/profile/info?uid=%s",id); String s2 = HttpClientUtil.doGet(format, null, headers1); com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject.parseObject(s2); com.alibaba.fastjson.JSONObject data = jsonObject2.getJSONObject("data"); com.alibaba.fastjson.JSONObject user2 = data.getJSONObject("user"); Object followers_count = user2.get("followers_count"); String screen_name = user2.getString("screen_name"); String statuses_count = user2.getString("statuses_count"); System.out.println("博主名:"+screen_name); System.out.println("博主id:"+id); System.out.println("粉丝数:"+followers_count); System.out.println("帖子数:"+statuses_count); HashMap<String, String> headers2 = new HashMap<>(); headers2.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0"); headers2.put("Cookie","SUB=_2A25OBB9gDeRhGeNG4lIZ8ybPyziIHXVtBqEorDV8PUJbkNAKLU7jkW1NSxZ3rk-lltjdVTJWAuJrtG1N-6THcXTY; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF0Mxd1XQQmx6HGO.e6BGsD5NHD95Qf1h.71heRe05XWs4Dqcj.i--fi-z7iKysi--RiKyWi-zpi--ci-2XiK.Ei--fiK.Ei-24; XSRF-TOKEN=ZxMS7NEE0TvXaeGc55l4CcWl; _s_tentry=weibo.com; Apache=2654538524812.041.1660972627006; SINAGLOBAL=2654538524812.041.1660972627006; ULV=1660972627078:1:1:1:2654538524812.041.1660972627006:; SSOLoginState=1660972849; WBPSESS=7pJQxz1_dPdMSL7AXnCXDf0T9olQ0YfW5LtecSt6SMnNs6oSz17JJhurTo7Zik1em1LCWoqCVL9m0scGsEictocaVoXXzdb7nwtc_2ZHJITDXVSMz_k5pmHQVvumdbsK9jmQ66-yF2eZaVgy5IUw8w=="); Integer integer = Integer.valueOf(statuses_count); double page = integer/20; Double ceil1 = Math.ceil(page); int ceil = ceil1.intValue(); // int ceil = 10; for (int i = 1; i <= ceil; i++) { String format1 = String.format("https://weibo.com/ajax/statuses/mymblog?uid=%s&page=%s",id,i); String s3 = HttpClientUtil.doGet(format1, null, headers2); com.alibaba.fastjson.JSONObject jsonObject3 = com.alibaba.fastjson.JSONObject.parseObject(s3); com.alibaba.fastjson.JSONObject data1 = jsonObject3.getJSONObject("data"); JSONArray list = data1.getJSONArray("list"); for (Object o1 : list) { String s4 = com.alibaba.fastjson.JSONObject.toJSONString(o1); com.alibaba.fastjson.JSONObject jsonObject4 = com.alibaba.fastjson.JSONObject.parseObject(s4); String text_raw = jsonObject4.getString("text_raw"); boolean contains = text_raw.contains("@"); if(contains){ //("(?<=@).*?(?=' ')"); String regex= "(?<=@)(.*?)(?= )"; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(text_raw); while (matcher.find()){ // System.out.println(matcher.group(1)); String group = matcher.group(); if (map.containsKey(group)){ Integer value = map.get(group); // value++; int l = value + 1; map.put(group,l); }else { map.put(group,1); } //System.out.println(map); } } } // try { // Thread.sleep(200); // } catch (InterruptedException e) { // e.printStackTrace(); // } } List<Map<String, Integer>> mapArrayList = new ArrayList<>(); mapArrayList.add(map); // 定义一个新的工作簿 XSSFWorkbook wb = new XSSFWorkbook(); // 创建一个Sheet页 XSSFSheet sheet = wb.createSheet("First sheet"); //设置行高 sheet.setDefaultRowHeight((short) (2 * 256)); //设置列宽 sheet.setColumnWidth(0, 4000); sheet.setColumnWidth(1, 4000); XSSFFont font = wb.createFont(); font.setFontName("宋体"); font.setFontHeightInPoints((short) 16); //获得表格第一行 XSSFRow row = sheet.createRow(0); //根据需要给第一行每一列设置标题 XSSFCell cell = row.createCell(0); cell.setCellValue("姓名"); cell = row.createCell(1); cell.setCellValue("个数"); XSSFRow rows; XSSFCell cells; System.out.println(mapArrayList); //循环拿到的数据给所有行每一列设置对应的值 for (int i = 0; i < mapArrayList.size(); i++) { // 在这个sheet页里创建一行 // rows = sheet.createRow(i + 1); // 该行创建一个单元格,在该单元格里设置值 List <String> name = new ArrayList<>(mapArrayList.get(i).keySet()); // int size = mapArrayList.get(i).size(); int j = 0; for(String obj : name){ rows = sheet.createRow(j + 1); cells = rows.createCell(i); cells.setCellValue(obj); cells = rows.createCell(1); cells.setCellValue(mapArrayList.get(i).get(obj)); j++; } // Integer age = mapArrayList.get(i).get("个数"); } try { //D:\Ban String src = "D:/Ban/a.xls"; File file = new File(src); FileOutputStream fileOutputStream = new FileOutputStream(file); wb.write(fileOutputStream); wb.close(); fileOutputStream.close(); } catch (IOException e) { e.printStackTrace(); } }
希望在我的学习之路上留下足迹,时时刻刻提醒自己不忘初心!有时候你踮起脚不是为了更好的看到世界,而是为了让世界看到你!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!