为了能到远方,脚下的每一步都不能少.|

🐳.城南

园龄:7年5个月粉丝:11关注:1

抓取赶集app数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
# -*- coding:utf-8 -*-
 
 
import json
import requests
 
url = "https://app.ganji.com/datashare/"
 
headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "userid": "C1ED10776D9B6108D8FEFEE4EA53058A",
            "model":"Generic/iphone",
            "customerid":"705",
            "clientagent":"iPhone 6S Plus#414*736#11.0.3",
            "versionid":"8.3.0",
            "os":"ios",
            "net":"wifi",
            "dv":"iPhone 6S Plus",
            "interface":"SearchPostsByJson3",
            "accept-language":"zh-cn",
        }
 
def req(url, headers, data):
    content = None
    try:
        r = requests.post(url, headers=headers, data=data, timeout=5)
        content = r.json()
    except Exception as e:
        print("requests error: ", e, "requests url: ", url)
    return content
 
def get_ganji_list_data():
    # 获取列表数据
    data = 't=-576747455&&showType=0&showtype=0&jsonArgs={"pageSize":20,"cityScriptIndex":2300,"majorCategoryScriptIndex":7,"queryFilters":[],"categoryId":7,"andKeywords":[{"name":"title","value":"%E5%95%86%E9%93%BA%E5%87%BA%E5%94%AE"}],"customerId":"705","sortKeywords":[{"field":"post_at","sort":"desc"}],"pageIndex":1}'
    ganji_data = req(url, headers, data)
    if ganji_data is not None:
        return ganji_data
    return None
 
def get_article_data():
    ganji_data = get_ganji_list_data()
    if ganji_data is not None:
        data_list = ganji_data["posts"]
        print("count: ", ganji_data["total"])
 
        for data_ in data_list:
            title, d_sign, puid = data_["title"], data_["d_sign"], data_["puid"]
            print(title, d_sign)
            data_article = "d_sign={0}&cityId=176&post_type_for_maidian=5&categoryId=7&spfy=0".format(d_sign)
            # 根据 puid 获取详细信息. puid  需放在headers中
            headers["interface"] = "GetPostByPuid"
            headers["puid"] = puid
            content_data = req(url, headers, data_article)
            if content_data["status"] == 0:
                data = content_data["data"]
                end_data = {}
                end_data["price"] = data["price"]["v"]
                end_data["price_unit"] = data["price"]["u"]
                end_data["title"] = data["title"]
                end_data["city"] = data["city"]
                end_data["description"] = data["description"]
                end_data["district_name"] = data["district_name"]
                end_data["street_name"] = data["street_name"]
                end_data["latlng"] = data["latlng"]
                end_data["id"] = data["id"]
 
            time.sleep(2)

header里东西真多,最终测试 只需要这几种,累死宝宝了,

 教程仅供技术研究学习使用,若有侵权,联系本人删除

本文作者:🐳.城南

本文链接:https://www.cnblogs.com/dockers/p/7811514.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。

posted @   🐳.城南  阅读(406)  评论(2编辑  收藏  举报
点击右上角即可分享
微信分享提示
评论
收藏
关注
推荐
深色
回顶
收起