最近在找房子,就写了个程序,爬取一下58房源

由于58的APP筛房子没有各种排序,不好找房子,没有我爱我家的APP写的好,但是我爱我家又要中介费

就写了个程序爬取导出58所有房源,然后一览众山小

步骤如下 

下载使用Charles

使用方法 

https://blog.csdn.net/forebe/article/details/98945139

 

手机连接上Charles后保证能抓数据之后

打开58APP 设置你搜的租房条件 

然后再手机上一直往下拉 拉倒低

然后过滤请求

 

https://apphouse.58.com/api/list/chuzu/?focusActiveDict

 

 

接下来全选 右键保存到一个文件夹

 

 

 

 

 

 

 

 

然后随便创建一个项目 把代码贴进去 运行  

 

运行代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
package com.songaw.pachong;
 
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
 
/**
 * @author songanwei
 * @description todo
 * @date 2022/8/31
 */
public class WubaPachong {
    public static void main(String[] args) {
        File dir=new File("/Users/mac/Desktop/58");
        List<JSONObject> jsonList=new ArrayList<>();
        if(dir.isDirectory()){
            File[] jsonFiles=dir.listFiles();
            if(jsonFiles!=null&&jsonFiles.length>0){
 
                for(File jsonFile:jsonFiles){
                    String jsonStr=readFile(jsonFile);
                    jsonStr=jsonStr.replaceAll("\n","").replaceAll("\t","");
                    if(jsonStr==null){
                        continue;
                    }
                    try{
                        JSONObject jsonObject=JSON.parseObject(jsonStr);
                        jsonList.add(jsonObject);
                    }catch (Exception e){
                        e.printStackTrace();
                    }
                }
            }
        }
        if(jsonList.size()>0){
            exportPachong("58租房",jsonList);
        }
    }
    private static String readFile(File file){
        String line=null;
        FileInputStream fis=null;
        try {
             fis = new FileInputStream(file);
            byte[] data = new byte[(int) file.length()];
            fis.read(data);
             line = new String(data, StandardCharsets.UTF_8);
        }catch (Exception e){
            return null;
        }finally {
            if(fis!=null) {
                try {
                    fis.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return line;
    }
    public static void exportPachong(String title, List<JSONObject> excelMapList){
 
//        序号    抓取时间    发布平台    板块  子版块 榜单名字    榜单地址    第1个产品名字 第2个产品名字 第3个产品名字 第4个产品名字 第5个产品名字 第6个产品名字 第7个产品名字 第8个产品名字 第9个产品名字 第10个产品名字
        Date now = new Date();
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
        String dateStr=sdf.format(now);
 
 
 
 
 
 
        List<Object[]> dataList = new ArrayList<Object[]>();
        int count=0;
        for (int i = 0; i < excelMapList.size(); i++) {
            JSONObject jsonObject = excelMapList.get(i).getJSONObject("result");
            JSONObject  getListInfo=jsonObject.getJSONObject("getListInfo");
            if(getListInfo==null){
                continue;
            }
            JSONArray  infolist=getListInfo.getJSONArray("infolist");
            if(infolist==null){
                continue;
            }
            for(int j=0;j<infolist.size();j++){
                count++;
                JSONObject info=infolist.getJSONObject(j);
 
                List<Object> elementList=new ArrayList<>();
                elementList.add(""+count);
                elementList.add(dateStr);
                elementList.add(info.get("title") == null ? "" : info.get("title").toString());
                elementList.add(info.get("huxing") == null ? "" : info.get("huxing").toString());
                elementList.add(info.get("area") == null ? "" : info.get("area").toString().replace("㎡",""));
                elementList.add(info.get("price") == null ? "" : info.get("price").toString().replace("元/月",""));
 
                elementList.add(info.get("date") == null ? "" : info.get("date").toString());
                String tuijian="";
                try {
                     tuijian = info.getJSONObject("recommendReason").getString("text");
 
                }catch (Exception e){
                    e.printStackTrace();
                }
                elementList.add(tuijian);
 
                elementList.add(info.get("lastLocal") == null ? "" : info.get("lastLocal").toString());
                String local_address="";
                try {
 
                     local_address = info.getJSONObject("distanceDict").getString("local_address");
 
                }catch (Exception e){
                    e.printStackTrace();
                }
                elementList.add(local_address);
                String louceng="";
                String chaoxiang="";
                String jingwei="";
                try {
 
                    louceng = info.getJSONObject("detailaction").getJSONObject("content").getJSONObject("preLoadInfo")
                            .getJSONObject("result").getJSONArray("info").getJSONObject(5).getJSONObject("zf_titleinfo_area")
                            .getJSONArray("base_info").getJSONObject(2).getString("title");
                    chaoxiang = info.getJSONObject("detailaction").getJSONObject("content").getJSONObject("preLoadInfo")
                            .getJSONObject("result").getJSONArray("info").getJSONObject(5).getJSONObject("zf_titleinfo_area")
                            .getJSONArray("base_info").getJSONObject(3).getString("title");
 
 
                }catch (Exception e){
                    e.printStackTrace();
                }
                elementList.add(louceng);
                elementList.add(chaoxiang);
                try {
                    String xq_lat=info.getJSONObject("detailaction").getJSONObject("content").getJSONObject("preLoadInfo")
                            .getJSONObject("result").getJSONArray("info").getJSONObject(13).getJSONObject("zf_simplemap_trip").getString("xq_lat");
                    String xq_lon=info.getJSONObject("detailaction").getJSONObject("content").getJSONObject("preLoadInfo")
                            .getJSONObject("result").getJSONArray("info").getJSONObject(13).getJSONObject("zf_simplemap_trip").getString("xq_lon");
                    jingwei = xq_lat+","+xq_lon;
 
                }catch (Exception e){
                    e.printStackTrace();
                }
                elementList.add(info.get("usedTages") == null ? "" : info.get("usedTages").toString());
                elementList.add(jingwei);
 
 
                Object[] element =new String[elementList.size()];
                for(int k=0;k<elementList.size();k++) {
                    element[k]=elementList.get(k);
                }
                dataList.add(element);
            }
        }
        String[] rowName = {"序号",
                "抓取时间",
                "标题",
                "户型",
                "面积",
                "价格",
                "发布时间",
                "推荐内容",
                "地址1",
                "地址2",
                "楼层",
                "朝向",
                "Tag",
                "经纬度"
        };
 
        try {
            String fileUrl=title+new SimpleDateFormat("yyyy年MM月dd日_HH").format(new Date())+".xls";
            File file = new File("/tmp/"+fileUrl);
            HSSFWorkbook workbook;
            if(file.exists()) {
                workbook=new HSSFWorkbook(new FileInputStream(file));
 
                workbook = PoiUtils.export2(workbook,title, rowName, dataList);
            }else{
                workbook = PoiUtils.export(title, rowName, dataList);
            }
            // Excel的名字
 
            FileOutputStream outputStream = new FileOutputStream(file);
            workbook.write(outputStream);
            System.out.println("抓取成功");
            outputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

  工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
package com.songaw.pachong;
 
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.hssf.util.CellRangeAddress;
import org.apache.poi.hssf.util.HSSFColor;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
 
/**
 * @Description:excel导出工具类
 * @Author: dk
 * @Date: 2020/1/14 9:49
 */
public class PoiUtils {
 
    public static void exportPachong(String title, List<JSONObject> excelMapList,Call call){
 
        // 序号   发布时间    抓取时间    发布平台    板块  子版块 品牌  内容地址    内容形式(图文/直播/视频)  标题  阅读数/曝光量/播放量 互动量(助力) 点赞数 评论数 收藏数 转发数 发布者昵称   发布者认证信息 发布者粉丝数  发布者个人主页点赞数  发布者关注数
 
        String[] rowName = {"序号",
                "发布时间",
                "抓取时间",
                "发布平台",
                "板块",
                "子版块",
                "品牌",
                "内容地址",
                "内容形式(图文/直播/视频)",
                "标题",
                "阅读数/曝光量/播放量",
                "互动量(助力)",
                "点赞数",
                "评论数",
                "收藏数",
                "转发数",
                "发布者昵称",
                "发布者认证信息",
                "发布者粉丝数",
                "发布者个人主页点赞数",
                "发布者关注数"};
        List<Object[]> dataList = new ArrayList<Object[]>();
        for (int i = 0; i < excelMapList.size(); i++) {
            JSONObject jsonObject = excelMapList.get(i);
            Object[] element =call.call(i,jsonObject);
            dataList.add(element);
        }
        try {
            String fileUrl=title+new SimpleDateFormat("yyyy年MM月dd日_HH").format(new Date())+".xls";
            File file = new File("/tmp/"+fileUrl);
            HSSFWorkbook workbook;
            if(file.exists()) {
                workbook=new HSSFWorkbook(new FileInputStream(file));
                workbook = PoiUtils.export2(workbook,title, rowName, dataList);
            }else{
                workbook = PoiUtils.export(title, rowName, dataList);
            }
            // Excel的名字
 
            FileOutputStream outputStream = new FileOutputStream(file);
            workbook.write(outputStream);
            outputStream.close();
            System.out.println("爬取完成");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    abstract static class Call{
        abstract Object[] call(int i,JSONObject jsonObject);
 
    }
    /**
     * excel导出数据
     *
     * @param title    显示的导出表的标题
     * @param rowName  导出表的列名
     * @param dataList 表的内容
     * @return
     */
    public static HSSFWorkbook export(String title, String[] rowName, List<Object[]> dataList) throws Exception {
        HSSFWorkbook workbook = new HSSFWorkbook(); // 创建工作簿对象
        HSSFSheet sheet = workbook.createSheet(title); // 创建工作表
        // 产生表格标题行
        HSSFRow rowm = sheet.createRow(0);
        HSSFCell cellTiltle = rowm.createCell(0);
        // sheet样式定义【getColumnTopStyle()/getStyle()均为自定义方法 - 在下面 - 可扩展】
        HSSFCellStyle columnTopStyle = getColumnTopStyle(workbook);// 获取列头样式对象
        HSSFCellStyle style = getStyle(workbook); // 单元格样式对象
        sheet.addMergedRegion(new CellRangeAddress(0, 1, 0, (rowName.length - 1)));
        cellTiltle.setCellStyle(columnTopStyle);
        cellTiltle.setCellValue(title);
        // 定义所需列数
        int columnNum = rowName.length;
        HSSFRow rowRowName = sheet.createRow(2); // 在索引2的位置创建行(最顶端的行开始的第二行)
        // 将列头设置到sheet的单元格中
        for (int n = 0; n < columnNum; n++) {
            HSSFCell cellRowName = rowRowName.createCell(n); // 创建列头对应个数的单元格
            cellRowName.setCellType(HSSFCell.CELL_TYPE_STRING); // 设置列头单元格的数据类型
            HSSFRichTextString text = new HSSFRichTextString(rowName[n]);
            cellRowName.setCellValue(text); // 设置列头单元格的值
            cellRowName.setCellStyle(columnTopStyle); // 设置列头单元格样式
 
        }
        // 将查询出的数据设置到sheet对应的单元格中
        for (int i = 0; i < dataList.size(); i++) {
            Object[] obj = dataList.get(i);// 遍历每个对象
            HSSFRow row = sheet.createRow(i + 3);// 创建所需的行数
            for (int j = 0; j < obj.length; j++) {
                HSSFCell cell = null; // 设置单元格的数据类型
 
                cell = row.createCell(j, HSSFCell.CELL_TYPE_STRING);
                if (!"".equals(obj[j]) && obj[j] != null) {
                    cell.setCellValue(obj[j].toString()); // 设置单元格的值
                }else{
                    cell.setCellValue("");
                }
 
                cell.setCellStyle(style); // 设置单元格样式
            }
        }
        // 让列宽随着导出的列长自动适应
        for (int colNum = 0; colNum < columnNum; colNum++) {
            int columnWidth = sheet.getColumnWidth(colNum) / 256;
            for (int rowNum = 0; rowNum < sheet.getLastRowNum(); rowNum++) {
                HSSFRow currentRow;
                // 当前行未被使用过
                if (sheet.getRow(rowNum) == null) {
                    currentRow = sheet.createRow(rowNum);
                } else {
                    currentRow = sheet.getRow(rowNum);
                }
                if (currentRow.getCell(colNum) != null) {
                    HSSFCell currentCell = currentRow.getCell(colNum);
                    if (currentCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
                        int length=0;
                        try {
                            if (currentCell != null && currentCell.getStringCellValue() != null) {
                                length = currentCell.getStringCellValue().getBytes().length;
                            }
                        }catch (Exception e){
 
                        }
                        if (columnWidth < length) {
                            columnWidth = length;
                        }
                    }
                }
            }
            try {
                if (colNum == 0) {
                    sheet.setColumnWidth(colNum, (columnWidth - 2) * 256);
                } else {
                    sheet.setColumnWidth(colNum, (columnWidth + 4) * 256);
                }
            }catch (Exception e){
 
            }
        }
        return workbook;
    }
 
 
    /**
     * excel导出数据
     *
     * @param title    显示的导出表的标题
     * @param rowName  导出表的列名
     * @param dataList 表的内容
     * @return
     */
    public static HSSFWorkbook export2(HSSFWorkbook workbook,String title, String[] rowName, List<Object[]> dataList) throws Exception {
 
        HSSFSheet sheet = workbook.getSheet(title); // 创建工作表
        // 产生表格标题行
 
 
        // sheet样式定义【getColumnTopStyle()/getStyle()均为自定义方法 - 在下面 - 可扩展】
        HSSFCellStyle columnTopStyle = getColumnTopStyle(workbook);// 获取列头样式对象
        HSSFCellStyle style = getStyle(workbook); // 单元格样式对象
        sheet.addMergedRegion(new CellRangeAddress(0, 1, 0, (rowName.length - 1)));
 
        // 定义所需列数
        int columnNum = rowName.length;
        int lastRowNum= sheet.getLastRowNum();
        // 将查询出的数据设置到sheet对应的单元格中
        for (int i = 0; i < dataList.size(); i++) {
            Object[] obj = dataList.get(i);// 遍历每个对象
            HSSFRow row = sheet.createRow(i +lastRowNum+1);// 创建所需的行数
            for (int j = 0; j < obj.length; j++) {
                HSSFCell cell = null; // 设置单元格的数据类型
 
                cell = row.createCell(j, HSSFCell.CELL_TYPE_STRING);
                if (!"".equals(obj[j]) && obj[j] != null) {
                    cell.setCellValue(obj[j].toString()); // 设置单元格的值
                }
 
                cell.setCellStyle(style); // 设置单元格样式
            }
        }
        // 让列宽随着导出的列长自动适应
        for (int colNum = 0; colNum < columnNum; colNum++) {
            int columnWidth = sheet.getColumnWidth(colNum) / 256;
            for (int rowNum = 0; rowNum < sheet.getLastRowNum(); rowNum++) {
                HSSFRow currentRow;
                // 当前行未被使用过
                if (sheet.getRow(rowNum) == null) {
                    currentRow = sheet.createRow(rowNum);
                } else {
                    currentRow = sheet.getRow(rowNum);
                }
                if (currentRow.getCell(colNum) != null) {
                    HSSFCell currentCell = currentRow.getCell(colNum);
                    if (currentCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
                        int length=0;
                        try {
                            if (currentCell != null && currentCell.getStringCellValue() != null) {
                                length = currentCell.getStringCellValue().getBytes().length;
                            }
                        }catch (Exception e){
 
                        }
                        if (columnWidth < length) {
                            columnWidth = length;
                        }
                    }
                }
            }
            try {
                if (colNum == 0) {
                    sheet.setColumnWidth(colNum, (columnWidth - 2) * 256);
                } else {
                    sheet.setColumnWidth(colNum, (columnWidth + 4) * 256);
                }
            }catch (Exception e){
 
            }
        }
        return workbook;
    }
 
    /**
     * 列头单元格样式
     *
     * @param workbook
     * @return
     */
    public static HSSFCellStyle getColumnTopStyle(HSSFWorkbook workbook) {
        // 设置字体
        HSSFFont font = workbook.createFont();
        // 设置字体大小
        font.setFontHeightInPoints((short) 11);
        // 字体加粗
        font.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);
        // 设置字体名字
        font.setFontName("Courier New");
        // 设置样式;
        HSSFCellStyle style = workbook.createCellStyle();
        // 设置底边框
        style.setBorderBottom(HSSFCellStyle.BORDER_THIN);
        // 设置底边框颜色
        style.setBottomBorderColor(HSSFColor.BLACK.index);
        // 设置左边框
        style.setBorderLeft(HSSFCellStyle.BORDER_THIN);
        // 设置左边框颜色
        style.setLeftBorderColor(HSSFColor.BLACK.index);
        // 设置右边框
        style.setBorderRight(HSSFCellStyle.BORDER_THIN);
        // 设置右边框颜色
        style.setRightBorderColor(HSSFColor.BLACK.index);
        // 设置顶边框
        style.setBorderTop(HSSFCellStyle.BORDER_THIN);
        // 设置顶边框颜色
        style.setTopBorderColor(HSSFColor.BLACK.index);
        // 在样式用应用设置的字体
        style.setFont(font);
        // 设置自动换行
        style.setWrapText(false);
        // 设置水平对齐的样式为居中对齐
        style.setAlignment(HSSFCellStyle.ALIGN_CENTER);
        // 设置垂直对齐的样式为居中对齐
        style.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER);
        return style;
    }
 
    /**
     * 列数据信息单元格样式
     *
     * @param workbook
     * @return
     */
    public static HSSFCellStyle getStyle(HSSFWorkbook workbook) {
        // 设置字体
        HSSFFont font = workbook.createFont();
        // 设置字体大小
        // font.setFontHeightInPoints((short)10);
        // 字体加粗
        // font.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);
        // 设置字体名字
        font.setFontName("Courier New");
        // 设置样式;
        HSSFCellStyle style = workbook.createCellStyle();
        // 设置底边框
        style.setBorderBottom(HSSFCellStyle.BORDER_THIN);
        // 设置底边框颜色
        style.setBottomBorderColor(HSSFColor.BLACK.index);
        // 设置左边框
        style.setBorderLeft(HSSFCellStyle.BORDER_THIN);
        // 设置左边框颜色
        style.setLeftBorderColor(HSSFColor.BLACK.index);
        // 设置右边框
        style.setBorderRight(HSSFCellStyle.BORDER_THIN);
        // 设置右边框颜色
        style.setRightBorderColor(HSSFColor.BLACK.index);
        // 设置顶边框
        style.setBorderTop(HSSFCellStyle.BORDER_THIN);
        // 设置顶边框颜色
        style.setTopBorderColor(HSSFColor.BLACK.index);
        // 在样式用应用设置的字体
        style.setFont(font);
        // 设置自动换行
        style.setWrapText(false);
        // 设置水平对齐的样式为居中对齐
        style.setAlignment(HSSFCellStyle.ALIGN_CENTER);
        // 设置垂直对齐的样式为居中对齐
        style.setVerticalAlignment(HSSFCellStyle.VERTICAL_CENTER);
        return style;
    }
}

  效果:

 

posted @   不断努力的青春  阅读(383)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 周边上新:园子的第一款马克杯温暖上架
点击右上角即可分享
微信分享提示