抓取赶集app数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | #!/usr/bin/env python # -*- coding:utf-8 -*- import json import requests url = "https://app.ganji.com/datashare/" headers = { "Content-Type" : "application/x-www-form-urlencoded" , "userid" : "C1ED10776D9B6108D8FEFEE4EA53058A" , "model" : "Generic/iphone" , "customerid" : "705" , "clientagent" : "iPhone 6S Plus#414*736#11.0.3" , "versionid" : "8.3.0" , "os" : "ios" , "net" : "wifi" , "dv" : "iPhone 6S Plus" , "interface" : "SearchPostsByJson3" , "accept-language" : "zh-cn" , } def req(url, headers, data): content = None try : r = requests.post(url, headers = headers, data = data, timeout = 5 ) content = r.json() except Exception as e: print ( "requests error: " , e, "requests url: " , url) return content def get_ganji_list_data(): # 获取列表数据 data = 't=-576747455&&showType=0&showtype=0&jsonArgs={"pageSize":20,"cityScriptIndex":2300,"majorCategoryScriptIndex":7,"queryFilters":[],"categoryId":7,"andKeywords":[{"name":"title","value":"%E5%95%86%E9%93%BA%E5%87%BA%E5%94%AE"}],"customerId":"705","sortKeywords":[{"field":"post_at","sort":"desc"}],"pageIndex":1}' ganji_data = req(url, headers, data) if ganji_data is not None : return ganji_data return None def get_article_data(): ganji_data = get_ganji_list_data() if ganji_data is not None : data_list = ganji_data[ "posts" ] print ( "count: " , ganji_data[ "total" ]) for data_ in data_list: title, d_sign, puid = data_[ "title" ], data_[ "d_sign" ], data_[ "puid" ] print (title, d_sign) data_article = "d_sign={0}&cityId=176&post_type_for_maidian=5&categoryId=7&spfy=0" . format (d_sign) # 根据 puid 获取详细信息. puid 需放在headers中 headers[ "interface" ] = "GetPostByPuid" headers[ "puid" ] = puid content_data = req(url, headers, data_article) if content_data[ "status" ] = = 0 : data = content_data[ "data" ] end_data = {} end_data[ "price" ] = data[ "price" ][ "v" ] end_data[ "price_unit" ] = data[ "price" ][ "u" ] end_data[ "title" ] = data[ "title" ] end_data[ "city" ] = data[ "city" ] end_data[ "description" ] = data[ "description" ] end_data[ "district_name" ] = data[ "district_name" ] end_data[ "street_name" ] = data[ "street_name" ] end_data[ "latlng" ] = data[ "latlng" ] end_data[ "id" ] = data[ "id" ] time.sleep( 2 ) |
header里东西真多,最终测试 只需要这几种,累死宝宝了,
教程仅供技术研究学习使用,若有侵权,联系本人删除
本文作者:🐳.城南
本文链接:https://www.cnblogs.com/dockers/p/7811514.html
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步