微博爬数据

复制代码
   void getUser(){
        Map<String, Integer> map = new HashMap<>();
        HashMap<String, String> headers = new HashMap<>();
        headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0");
        headers.put("Cookie","SUB=_2A25OBB9gDeRhGeNG4lIZ8ybPyziIHXVtBqEorDV8PUJbkNAKLU7jkW1NSxZ3rk-lltjdVTJWAuJrtG1N-6THcXTY; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF0Mxd1XQQmx6HGO.e6BGsD5NHD95Qf1h.71heRe05XWs4Dqcj.i--fi-z7iKysi--RiKyWi-zpi--ci-2XiK.Ei--fiK.Ei-24; XSRF-TOKEN=ZxMS7NEE0TvXaeGc55l4CcWl; _s_tentry=weibo.com; Apache=2654538524812.041.1660972627006; SINAGLOBAL=2654538524812.041.1660972627006; ULV=1660972627078:1:1:1:2654538524812.041.1660972627006:; SSOLoginState=1660972849; WBPSESS=7pJQxz1_dPdMSL7AXnCXDf0T9olQ0YfW5LtecSt6SMnNs6oSz17JJhurTo7Zik1em1LCWoqCVL9m0scGsEictmrfwhqolExW-PYkh6TLS9C7vnatXY6ZBWEQsnj0vwcgKwilI1AKAgAxRGvUsmNr0w==");
        String s = HttpClientUtil.doGet("https://weibo.com/ajax/feed/unreadfriendstimeline?list_id=100015890838304&refresh=4&max_id=1661129223609925&count=15", null, headers);
       com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject.parseObject(s);
       JSONArray statuses = jsonObject.getJSONArray("statuses");
       Object o = statuses.get(0);
       String s1 = com.alibaba.fastjson.JSONObject.toJSONString(o);
       com.alibaba.fastjson.JSONObject jsonObject1 = com.alibaba.fastjson.JSONObject.parseObject(s1);
       com.alibaba.fastjson.JSONObject user = jsonObject1.getJSONObject("user");
       Object id = user.get("id");
       HashMap<String, String> headers1 = new HashMap<>();
       headers1.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0");
       headers1.put("Cookie","SUB=_2A25OBB9gDeRhGeNG4lIZ8ybPyziIHXVtBqEorDV8PUJbkNAKLU7jkW1NSxZ3rk-lltjdVTJWAuJrtG1N-6THcXTY; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF0Mxd1XQQmx6HGO.e6BGsD5NHD95Qf1h.71heRe05XWs4Dqcj.i--fi-z7iKysi--RiKyWi-zpi--ci-2XiK.Ei--fiK.Ei-24; XSRF-TOKEN=ZxMS7NEE0TvXaeGc55l4CcWl; _s_tentry=weibo.com; Apache=2654538524812.041.1660972627006; SINAGLOBAL=2654538524812.041.1660972627006; ULV=1660972627078:1:1:1:2654538524812.041.1660972627006:; SSOLoginState=1660972849; WBPSESS=7pJQxz1_dPdMSL7AXnCXDf0T9olQ0YfW5LtecSt6SMnNs6oSz17JJhurTo7Zik1eGqyoXUKpM2W41KIDCDkOtBuCGPNyE1P0pDIpMdWKlnnTcm5T7J1kiGnlPV4m-csfCRzAfpmH0TG_aa3UuId66w==");
       String format = String.format("https://weibo.com/ajax/profile/info?uid=%s",id);
       String s2 = HttpClientUtil.doGet(format, null, headers1);
       com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject.parseObject(s2);
       com.alibaba.fastjson.JSONObject data = jsonObject2.getJSONObject("data");
       com.alibaba.fastjson.JSONObject user2 = data.getJSONObject("user");
       Object followers_count = user2.get("followers_count");
       String screen_name = user2.getString("screen_name");
       String statuses_count = user2.getString("statuses_count");
       System.out.println("博主名:"+screen_name);
       System.out.println("博主id:"+id);
       System.out.println("粉丝数:"+followers_count);
       System.out.println("帖子数:"+statuses_count);
       HashMap<String, String> headers2 = new HashMap<>();
       headers2.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0");
       headers2.put("Cookie","SUB=_2A25OBB9gDeRhGeNG4lIZ8ybPyziIHXVtBqEorDV8PUJbkNAKLU7jkW1NSxZ3rk-lltjdVTJWAuJrtG1N-6THcXTY; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF0Mxd1XQQmx6HGO.e6BGsD5NHD95Qf1h.71heRe05XWs4Dqcj.i--fi-z7iKysi--RiKyWi-zpi--ci-2XiK.Ei--fiK.Ei-24; XSRF-TOKEN=ZxMS7NEE0TvXaeGc55l4CcWl; _s_tentry=weibo.com; Apache=2654538524812.041.1660972627006; SINAGLOBAL=2654538524812.041.1660972627006; ULV=1660972627078:1:1:1:2654538524812.041.1660972627006:; SSOLoginState=1660972849; WBPSESS=7pJQxz1_dPdMSL7AXnCXDf0T9olQ0YfW5LtecSt6SMnNs6oSz17JJhurTo7Zik1em1LCWoqCVL9m0scGsEictocaVoXXzdb7nwtc_2ZHJITDXVSMz_k5pmHQVvumdbsK9jmQ66-yF2eZaVgy5IUw8w==");
       Integer integer = Integer.valueOf(statuses_count);
       double page = integer/20;
       Double ceil1 = Math.ceil(page);
       int ceil = ceil1.intValue();
//               int ceil = 10;
       for (int i = 1; i <= ceil; i++) {
           String format1 = String.format("https://weibo.com/ajax/statuses/mymblog?uid=%s&page=%s",id,i);
           String s3 = HttpClientUtil.doGet(format1, null, headers2);
           com.alibaba.fastjson.JSONObject jsonObject3 = com.alibaba.fastjson.JSONObject.parseObject(s3);
           com.alibaba.fastjson.JSONObject data1 = jsonObject3.getJSONObject("data");
           JSONArray list = data1.getJSONArray("list");
           for (Object o1 : list) {
               String s4 = com.alibaba.fastjson.JSONObject.toJSONString(o1);
               com.alibaba.fastjson.JSONObject jsonObject4 = com.alibaba.fastjson.JSONObject.parseObject(s4);
                   String text_raw = jsonObject4.getString("text_raw");
                   boolean contains = text_raw.contains("@");
                   if(contains){
                       //("(?<=@).*?(?=' ')");
                       String regex= "(?<=@)(.*?)(?= )";
                       Pattern pattern = Pattern.compile(regex);
                       Matcher matcher = pattern.matcher(text_raw);
                       while (matcher.find()){
    //                       System.out.println(matcher.group(1));
                           String group = matcher.group();
                           if (map.containsKey(group)){
                               Integer value = map.get(group);
    //                           value++;
                               int l = value + 1;
                               map.put(group,l);
                           }else {
                               map.put(group,1);
                           }
                           //System.out.println(map);
                       }
                   }
               }
//                               try {
//                                   Thread.sleep(200);
//                               } catch (InterruptedException e) {
//                                   e.printStackTrace();
//                               }

       }
       List<Map<String, Integer>> mapArrayList = new ArrayList<>();
       mapArrayList.add(map);
       // 定义一个新的工作簿
       XSSFWorkbook wb = new XSSFWorkbook();
       // 创建一个Sheet页
       XSSFSheet sheet = wb.createSheet("First sheet");
       //设置行高
       sheet.setDefaultRowHeight((short) (2 * 256));
       //设置列宽
       sheet.setColumnWidth(0, 4000);
       sheet.setColumnWidth(1, 4000);
       XSSFFont font = wb.createFont();
       font.setFontName("宋体");
       font.setFontHeightInPoints((short) 16);
       //获得表格第一行
       XSSFRow row = sheet.createRow(0);
       //根据需要给第一行每一列设置标题
       XSSFCell cell = row.createCell(0);
       cell.setCellValue("姓名");
       cell = row.createCell(1);
       cell.setCellValue("个数");
       XSSFRow rows;
       XSSFCell cells;
       System.out.println(mapArrayList);
       //循环拿到的数据给所有行每一列设置对应的值
       for (int i = 0; i < mapArrayList.size(); i++) {
           // 在这个sheet页里创建一行
//           rows = sheet.createRow(i + 1);
           // 该行创建一个单元格,在该单元格里设置值
           List <String> name = new ArrayList<>(mapArrayList.get(i).keySet());
//           int size = mapArrayList.get(i).size();
           int j = 0;
           for(String obj : name){
               rows = sheet.createRow(j + 1);
               cells = rows.createCell(i);
               cells.setCellValue(obj);
               cells = rows.createCell(1);
               cells.setCellValue(mapArrayList.get(i).get(obj));
               j++;
           }
//           Integer age = mapArrayList.get(i).get("个数");
       }
       try {
           //D:\Ban
           String src = "D:/Ban/a.xls";
           File file = new File(src);
           FileOutputStream fileOutputStream = new FileOutputStream(file);
           wb.write(fileOutputStream);
           wb.close();
           fileOutputStream.close();
       } catch (IOException e) {
           e.printStackTrace();
       }
   }
复制代码

 

posted @   SunSpring  阅读(60)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
点击右上角即可分享
微信分享提示