python 基础 6 解析之JsonPath

pip安装
pip install jsonpath
jsonpath的使用

obj = json.load(open('json文件', 'r', encoding='utf‐8'))
ret = jsonpath.jsonpath(obj, 'jsonpath语法')

jsonpath与lxml语法对比

数据源

{ "store": {
    "book": [ 
      { "category": "reference",
        "author": "Nigel Rees",
        "title": "Sayings of the Century",
        "price": 8.95
      },
      { "category": "fiction",
        "author": "Evelyn Waugh",
        "title": "Sword of Honour",
        "price": 12.99
      },
      { "category": "fiction",
        "author": "Herman Melville",
        "title": "Moby Dick",
        "isbn": "0-553-21311-3",
        "price": 8.99
      },
      { "category": "fiction",
        "author": "J. R. R. Tolkien",
        "title": "The Lord of the Rings",
        "isbn": "0-395-19395-8",
        "price": 22.99
      }
    ],
    "bicycle": {
      "color": "red",
      "price": 19.95
    }
  }
}

通过jsonpath_读取淘票票城市接口案例

import json
import jsonpath
import urllib.request

headers = {
    "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
    'cookie': 'miid=536765677517889060; t=78466542de5dbe84715c098fa2366f87; cookie2=11c90be2b7bda713126ed897ab23e35d; v=0; _tb_token_=ee5863e335344; cna=jYeFGkfrFXoCAXPrFThalDwd; xlly_s=1; tfstk=cdlVBIX7qIdVC-V6pSNwCDgVlVEAa8mxXMa3nx9gjUzPOZeuYsAcXzbAiJwAzG2c.; l=eBxbMUncLj6r4x9hBO5aourza77T6BAb4sPzaNbMiInca6BOT3r6QNCnaDoy7dtjgtCxretPp0kihRLHR3xg5c0c07kqm0JExxvO.; isg=BHBwrClf5nUOJrpxMvRIOGsqQT7CuVQDlydQ-WrHREsaJRDPEsmVk5EbfS1FtQzb',
    'referer': 'https://dianying.taobao.com/',
    'content-type': 'text/html;charset=UTF-8'
}


def create_request():
    res_obj = urllib.request.Request(url="https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1644570795658_173&jsoncallback=jsonp174&action=cityAction&n_s=new&event_submit_doGetAllRegion=true",headers=headers)
    return res_obj


def get_context(req_obj):
    resp = urllib.request.urlopen(req_obj)
    origin_context = resp.read().decode('utf-8')
    result = origin_context.split('jsonp174(')[1].split(')')[0]
    return result


def download_and_parse(context):
    with open('jsonpath_淘票票案例.json','w',encoding='utf-8') as fp:
        fp.write(context)


def parse_json():
    obj = json.load(open('jsonpath_淘票票案例.json', mode='r', encoding='utf-8'))
    region_name_list = jsonpath.jsonpath(obj, '$..regionName')
    print(region_name_list)
    print(len(region_name_list))


if __name__ == '__main__':
    req_obj = create_request()
    context = get_context(req_obj)
    download_and_parse(context)
    parse_json()

爬取Boss直聘城市信息

import urllib.request
import jsonpath
import json

url = 'https://www.zhipin.com/wapi/zpgeek/common/data/citysites.json'
resp = urllib.request.urlopen(url)
context = resp.read().decode('utf-8')
result = jsonpath.jsonpath(json.loads(context),'$..name')
print(result)
import  urllib.request
import jsonpath
import json
url ='https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1720768406447_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true'

headers = {
# ':authority':'dianying.taobao.com'
# ':method':'GET'
# ':path':'/cityAction.json?activityId&_ksTS=1720768406447_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true'
# ':scheme':'https'
'accept':'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
# 'accept-encoding':'gzip, deflate, br, zstd',
# 'accept-language':'zh-CN,zh;q=0.9'
# 'bx-v':'2.5.13'
# 'cache-control':'no-cache'
# 'cookie':"thw=xx; wk_cookie2=18990f9006718836afb3ee05aa926c21; wk_unb=WvKWX7%2FDoAlk; useNativeIM=false; t=932765d58a6a18c1ef0e0f1cf6df50e9; cookie2=1090904fb91f3b931a5d9a0509fc24fa; tfstk=f2AmiNXLKKWfYU4tmQfjCFFmmNMRlo11xhFOX1IZUgS7H1HXWNvkRhjx5x1vEGxyqiPxlEKl7hY6Hq6tcP4XZHVTMmMX75fO_DnK9XLfk11ZvM5LrMIf5USaJy5rFE11_0p2CQ7XlHc_-hHG_4fPSN_V_GPVU4bhS1PVQSyz4g_N_G-N0TPPRNV4uS7aUjbg_MAwNQoDcwtBlw_2ZZXreESzZNdloOjeuIVa_J7cnM8VqD3Cs36y5OA_55XMupt18nrqvtTwE3bcsbeVTUvwxaOq9obWHU-14e4gsE5cS95NEPPwrsJAUIfETubWUEpwGn4g_ZOv9OjCEVP1B_RdLp-052TPgN-1dCnQGi8wRB9dtbeVTUvwxdjyC8yeYkP1zVdzCR6VPajIMQ1IVCRFO53orJMf3a_xv40uCR6VPajKr42Qct75kDC..; _tb_token_=b43450e79765; xlly_s=1; _samesite_flag_=true; sgcookie=E100uwsc6x%2BXl7WRAZMnJAeV2aVes89fsAam4okfNBIdwkzhPFHkL%2BIlzEfRAXtAdEnP%2BJpkqiUdUQIqbll5HFWYqgKy5LyxkCsD%2BB2xivLYo7nu617qZVQjm%2BmP3yPy4TMp; havana_lgc_exp=1751870815675; sdkSilent=1720853215678; mtop_partitioned_detect=1; _m_h5_tk=0bc0edba406ed3205d61e775fc7d0ede_1720777370406; _m_h5_tk_enc=bb671fd7f0f60dd471d7714fada4d0a1; tb_city=110100; tb_cityName=\"sbG+qQ==\"; mt=ci=0_0; tracknick=; v=0; cna=G7kGHtASjUkBASQJikXBz2EI; isg=BAAA-1fBclPhyw244Zof257S0Y7SieRTsaiMG3qQkJuu9aEfIpiJ4cjLDV01xZwr"
# 'cookie':'thw=xx; wk_cookie2=18990f9006718836afb3ee05aa926c21; wk_unb=WvKWX7%2FDoAlk; useNativeIM=false; t=932765d58a6a18c1ef0e0f1cf6df50e9; cookie2=1090904fb91f3b931a5d9a0509fc24fa; tfstk=f2AmiNXLKKWfYU4tmQfjCFFmmNMRlo11xhFOX1IZUgS7H1HXWNvkRhjx5x1vEGxyqiPxlEKl7hY6Hq6tcP4XZHVTMmMX75fO_DnK9XLfk11ZvM5LrMIf5USaJy5rFE11_0p2CQ7XlHc_-hHG_4fPSN_V_GPVU4bhS1PVQSyz4g_N_G-N0TPPRNV4uS7aUjbg_MAwNQoDcwtBlw_2ZZXreESzZNdloOjeuIVa_J7cnM8VqD3Cs36y5OA_55XMupt18nrqvtTwE3bcsbeVTUvwxaOq9obWHU-14e4gsE5cS95NEPPwrsJAUIfETubWUEpwGn4g_ZOv9OjCEVP1B_RdLp-052TPgN-1dCnQGi8wRB9dtbeVTUvwxdjyC8yeYkP1zVdzCR6VPajIMQ1IVCRFO53orJMf3a_xv40uCR6VPajKr42Qct75kDC..; _tb_token_=b43450e79765; xlly_s=1; _samesite_flag_=true; sgcookie=E100uwsc6x%2BXl7WRAZMnJAeV2aVes89fsAam4okfNBIdwkzhPFHkL%2BIlzEfRAXtAdEnP%2BJpkqiUdUQIqbll5HFWYqgKy5LyxkCsD%2BB2xivLYo7nu617qZVQjm%2BmP3yPy4TMp; havana_lgc_exp=1751870815675; sdkSilent=1720853215678; mtop_partitioned_detect=1; _m_h5_tk=0bc0edba406ed3205d61e775fc7d0ede_1720777370406; _m_h5_tk_enc=bb671fd7f0f60dd471d7714fada4d0a1; tb_city=110100; tb_cityName="sbG+qQ=="; mt=ci=0_0; tracknick=; v=0; cna=G7kGHtASjUkBASQJikXBz2EI; isg=BAAA-1fBclPhyw244Zof257S0Y7SieRTsaiMG3qQkJuu9aEfIpiJ4cjLDV01xZwr',
'cookie':"thw=xx; wk_cookie2=18990f9006718836afb3ee05aa926c21; wk_unb=WvKWX7%2FDoAlk; useNativeIM=false; t=932765d58a6a18c1ef0e0f1cf6df50e9; cookie2=1090904fb91f3b931a5d9a0509fc24fa; tfstk=f2AmiNXLKKWfYU4tmQfjCFFmmNMRlo11xhFOX1IZUgS7H1HXWNvkRhjx5x1vEGxyqiPxlEKl7hY6Hq6tcP4XZHVTMmMX75fO_DnK9XLfk11ZvM5LrMIf5USaJy5rFE11_0p2CQ7XlHc_-hHG_4fPSN_V_GPVU4bhS1PVQSyz4g_N_G-N0TPPRNV4uS7aUjbg_MAwNQoDcwtBlw_2ZZXreESzZNdloOjeuIVa_J7cnM8VqD3Cs36y5OA_55XMupt18nrqvtTwE3bcsbeVTUvwxaOq9obWHU-14e4gsE5cS95NEPPwrsJAUIfETubWUEpwGn4g_ZOv9OjCEVP1B_RdLp-052TPgN-1dCnQGi8wRB9dtbeVTUvwxdjyC8yeYkP1zVdzCR6VPajIMQ1IVCRFO53orJMf3a_xv40uCR6VPajKr42Qct75kDC..; _tb_token_=b43450e79765; xlly_s=1; _samesite_flag_=true; sgcookie=E100uwsc6x%2BXl7WRAZMnJAeV2aVes89fsAam4okfNBIdwkzhPFHkL%2BIlzEfRAXtAdEnP%2BJpkqiUdUQIqbll5HFWYqgKy5LyxkCsD%2BB2xivLYo7nu617qZVQjm%2BmP3yPy4TMp; havana_lgc_exp=1751870815675; sdkSilent=1720853215678; mtop_partitioned_detect=1; _m_h5_tk=0bc0edba406ed3205d61e775fc7d0ede_1720777370406; _m_h5_tk_enc=bb671fd7f0f60dd471d7714fada4d0a1; mt=ci=0_0; tracknick=; v=0; cna=G7kGHtASjUkBASQJikXBz2EI; tb_city=110100; tb_cityName=\"sbG+qQ==\"; isg=BFJSDa9aYH1Z459uhwDNpeCooxg0Y1b9v6L-bRypEIXwL_ApBPXGDzNBn4sTX86V",
'pragma':'no-cache',
'priority':'u=1, i',
'referer':'https://dianying.taobao.com/index.htm?spm=a1z21.3046609.city.1.3e7b112aQiZaM7&n_s=new&city=110100',
'sec-ch-ua':'"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"',
'sec-fetch-dest':'empty',
'sec-fetch-mode':'cors',
'sec-fetch-site':'same-origin',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
'x-requested-with':'XMLHttpRequest'
}

request = urllib.request.Request(url,headers=headers)

response = urllib.request.urlopen(request)

context = response.read().decode('utf-8')
#split切割
context = context.split('(')[1].split(')')[0]
# print(context)
with open('context.json','w',encoding='utf-8') as pf:
    pf.write(context)

obj = json.load(open('context.json',mode='r',encoding='utf-8'))
data_city = jsonpath.jsonpath(obj,'$..regionName')
print(data_city)
posted @ 2024-07-12 17:30  donghongchao  阅读(17)  评论(0编辑  收藏  举报