python 基础 6 解析之JsonPath
pip安装
pip install jsonpath
jsonpath的使用
obj = json.load(open('json文件', 'r', encoding='utf‐8'))
ret = jsonpath.jsonpath(obj, 'jsonpath语法')
jsonpath与lxml语法对比
数据源
{ "store": {
"book": [
{ "category": "reference",
"author": "Nigel Rees",
"title": "Sayings of the Century",
"price": 8.95
},
{ "category": "fiction",
"author": "Evelyn Waugh",
"title": "Sword of Honour",
"price": 12.99
},
{ "category": "fiction",
"author": "Herman Melville",
"title": "Moby Dick",
"isbn": "0-553-21311-3",
"price": 8.99
},
{ "category": "fiction",
"author": "J. R. R. Tolkien",
"title": "The Lord of the Rings",
"isbn": "0-395-19395-8",
"price": 22.99
}
],
"bicycle": {
"color": "red",
"price": 19.95
}
}
}
通过jsonpath_读取淘票票城市接口案例
import json
import jsonpath
import urllib.request
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
'cookie': 'miid=536765677517889060; t=78466542de5dbe84715c098fa2366f87; cookie2=11c90be2b7bda713126ed897ab23e35d; v=0; _tb_token_=ee5863e335344; cna=jYeFGkfrFXoCAXPrFThalDwd; xlly_s=1; tfstk=cdlVBIX7qIdVC-V6pSNwCDgVlVEAa8mxXMa3nx9gjUzPOZeuYsAcXzbAiJwAzG2c.; l=eBxbMUncLj6r4x9hBO5aourza77T6BAb4sPzaNbMiInca6BOT3r6QNCnaDoy7dtjgtCxretPp0kihRLHR3xg5c0c07kqm0JExxvO.; isg=BHBwrClf5nUOJrpxMvRIOGsqQT7CuVQDlydQ-WrHREsaJRDPEsmVk5EbfS1FtQzb',
'referer': 'https://dianying.taobao.com/',
'content-type': 'text/html;charset=UTF-8'
}
def create_request():
res_obj = urllib.request.Request(url="https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1644570795658_173&jsoncallback=jsonp174&action=cityAction&n_s=new&event_submit_doGetAllRegion=true",headers=headers)
return res_obj
def get_context(req_obj):
resp = urllib.request.urlopen(req_obj)
origin_context = resp.read().decode('utf-8')
result = origin_context.split('jsonp174(')[1].split(')')[0]
return result
def download_and_parse(context):
with open('jsonpath_淘票票案例.json','w',encoding='utf-8') as fp:
fp.write(context)
def parse_json():
obj = json.load(open('jsonpath_淘票票案例.json', mode='r', encoding='utf-8'))
region_name_list = jsonpath.jsonpath(obj, '$..regionName')
print(region_name_list)
print(len(region_name_list))
if __name__ == '__main__':
req_obj = create_request()
context = get_context(req_obj)
download_and_parse(context)
parse_json()
爬取Boss直聘城市信息
import urllib.request
import jsonpath
import json
url = 'https://www.zhipin.com/wapi/zpgeek/common/data/citysites.json'
resp = urllib.request.urlopen(url)
context = resp.read().decode('utf-8')
result = jsonpath.jsonpath(json.loads(context),'$..name')
print(result)
import urllib.request
import jsonpath
import json
url ='https://dianying.taobao.com/cityAction.json?activityId&_ksTS=1720768406447_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true'
headers = {
# ':authority':'dianying.taobao.com'
# ':method':'GET'
# ':path':'/cityAction.json?activityId&_ksTS=1720768406447_108&jsoncallback=jsonp109&action=cityAction&n_s=new&event_submit_doGetAllRegion=true'
# ':scheme':'https'
'accept':'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
# 'accept-encoding':'gzip, deflate, br, zstd',
# 'accept-language':'zh-CN,zh;q=0.9'
# 'bx-v':'2.5.13'
# 'cache-control':'no-cache'
# 'cookie':"thw=xx; wk_cookie2=18990f9006718836afb3ee05aa926c21; wk_unb=WvKWX7%2FDoAlk; useNativeIM=false; t=932765d58a6a18c1ef0e0f1cf6df50e9; cookie2=1090904fb91f3b931a5d9a0509fc24fa; tfstk=f2AmiNXLKKWfYU4tmQfjCFFmmNMRlo11xhFOX1IZUgS7H1HXWNvkRhjx5x1vEGxyqiPxlEKl7hY6Hq6tcP4XZHVTMmMX75fO_DnK9XLfk11ZvM5LrMIf5USaJy5rFE11_0p2CQ7XlHc_-hHG_4fPSN_V_GPVU4bhS1PVQSyz4g_N_G-N0TPPRNV4uS7aUjbg_MAwNQoDcwtBlw_2ZZXreESzZNdloOjeuIVa_J7cnM8VqD3Cs36y5OA_55XMupt18nrqvtTwE3bcsbeVTUvwxaOq9obWHU-14e4gsE5cS95NEPPwrsJAUIfETubWUEpwGn4g_ZOv9OjCEVP1B_RdLp-052TPgN-1dCnQGi8wRB9dtbeVTUvwxdjyC8yeYkP1zVdzCR6VPajIMQ1IVCRFO53orJMf3a_xv40uCR6VPajKr42Qct75kDC..; _tb_token_=b43450e79765; xlly_s=1; _samesite_flag_=true; sgcookie=E100uwsc6x%2BXl7WRAZMnJAeV2aVes89fsAam4okfNBIdwkzhPFHkL%2BIlzEfRAXtAdEnP%2BJpkqiUdUQIqbll5HFWYqgKy5LyxkCsD%2BB2xivLYo7nu617qZVQjm%2BmP3yPy4TMp; havana_lgc_exp=1751870815675; sdkSilent=1720853215678; mtop_partitioned_detect=1; _m_h5_tk=0bc0edba406ed3205d61e775fc7d0ede_1720777370406; _m_h5_tk_enc=bb671fd7f0f60dd471d7714fada4d0a1; tb_city=110100; tb_cityName=\"sbG+qQ==\"; mt=ci=0_0; tracknick=; v=0; cna=G7kGHtASjUkBASQJikXBz2EI; isg=BAAA-1fBclPhyw244Zof257S0Y7SieRTsaiMG3qQkJuu9aEfIpiJ4cjLDV01xZwr"
# 'cookie':'thw=xx; wk_cookie2=18990f9006718836afb3ee05aa926c21; wk_unb=WvKWX7%2FDoAlk; useNativeIM=false; t=932765d58a6a18c1ef0e0f1cf6df50e9; cookie2=1090904fb91f3b931a5d9a0509fc24fa; tfstk=f2AmiNXLKKWfYU4tmQfjCFFmmNMRlo11xhFOX1IZUgS7H1HXWNvkRhjx5x1vEGxyqiPxlEKl7hY6Hq6tcP4XZHVTMmMX75fO_DnK9XLfk11ZvM5LrMIf5USaJy5rFE11_0p2CQ7XlHc_-hHG_4fPSN_V_GPVU4bhS1PVQSyz4g_N_G-N0TPPRNV4uS7aUjbg_MAwNQoDcwtBlw_2ZZXreESzZNdloOjeuIVa_J7cnM8VqD3Cs36y5OA_55XMupt18nrqvtTwE3bcsbeVTUvwxaOq9obWHU-14e4gsE5cS95NEPPwrsJAUIfETubWUEpwGn4g_ZOv9OjCEVP1B_RdLp-052TPgN-1dCnQGi8wRB9dtbeVTUvwxdjyC8yeYkP1zVdzCR6VPajIMQ1IVCRFO53orJMf3a_xv40uCR6VPajKr42Qct75kDC..; _tb_token_=b43450e79765; xlly_s=1; _samesite_flag_=true; sgcookie=E100uwsc6x%2BXl7WRAZMnJAeV2aVes89fsAam4okfNBIdwkzhPFHkL%2BIlzEfRAXtAdEnP%2BJpkqiUdUQIqbll5HFWYqgKy5LyxkCsD%2BB2xivLYo7nu617qZVQjm%2BmP3yPy4TMp; havana_lgc_exp=1751870815675; sdkSilent=1720853215678; mtop_partitioned_detect=1; _m_h5_tk=0bc0edba406ed3205d61e775fc7d0ede_1720777370406; _m_h5_tk_enc=bb671fd7f0f60dd471d7714fada4d0a1; tb_city=110100; tb_cityName="sbG+qQ=="; mt=ci=0_0; tracknick=; v=0; cna=G7kGHtASjUkBASQJikXBz2EI; isg=BAAA-1fBclPhyw244Zof257S0Y7SieRTsaiMG3qQkJuu9aEfIpiJ4cjLDV01xZwr',
'cookie':"thw=xx; wk_cookie2=18990f9006718836afb3ee05aa926c21; wk_unb=WvKWX7%2FDoAlk; useNativeIM=false; t=932765d58a6a18c1ef0e0f1cf6df50e9; cookie2=1090904fb91f3b931a5d9a0509fc24fa; tfstk=f2AmiNXLKKWfYU4tmQfjCFFmmNMRlo11xhFOX1IZUgS7H1HXWNvkRhjx5x1vEGxyqiPxlEKl7hY6Hq6tcP4XZHVTMmMX75fO_DnK9XLfk11ZvM5LrMIf5USaJy5rFE11_0p2CQ7XlHc_-hHG_4fPSN_V_GPVU4bhS1PVQSyz4g_N_G-N0TPPRNV4uS7aUjbg_MAwNQoDcwtBlw_2ZZXreESzZNdloOjeuIVa_J7cnM8VqD3Cs36y5OA_55XMupt18nrqvtTwE3bcsbeVTUvwxaOq9obWHU-14e4gsE5cS95NEPPwrsJAUIfETubWUEpwGn4g_ZOv9OjCEVP1B_RdLp-052TPgN-1dCnQGi8wRB9dtbeVTUvwxdjyC8yeYkP1zVdzCR6VPajIMQ1IVCRFO53orJMf3a_xv40uCR6VPajKr42Qct75kDC..; _tb_token_=b43450e79765; xlly_s=1; _samesite_flag_=true; sgcookie=E100uwsc6x%2BXl7WRAZMnJAeV2aVes89fsAam4okfNBIdwkzhPFHkL%2BIlzEfRAXtAdEnP%2BJpkqiUdUQIqbll5HFWYqgKy5LyxkCsD%2BB2xivLYo7nu617qZVQjm%2BmP3yPy4TMp; havana_lgc_exp=1751870815675; sdkSilent=1720853215678; mtop_partitioned_detect=1; _m_h5_tk=0bc0edba406ed3205d61e775fc7d0ede_1720777370406; _m_h5_tk_enc=bb671fd7f0f60dd471d7714fada4d0a1; mt=ci=0_0; tracknick=; v=0; cna=G7kGHtASjUkBASQJikXBz2EI; tb_city=110100; tb_cityName=\"sbG+qQ==\"; isg=BFJSDa9aYH1Z459uhwDNpeCooxg0Y1b9v6L-bRypEIXwL_ApBPXGDzNBn4sTX86V",
'pragma':'no-cache',
'priority':'u=1, i',
'referer':'https://dianying.taobao.com/index.htm?spm=a1z21.3046609.city.1.3e7b112aQiZaM7&n_s=new&city=110100',
'sec-ch-ua':'"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
'sec-ch-ua-mobile':'?0',
'sec-ch-ua-platform':'"Windows"',
'sec-fetch-dest':'empty',
'sec-fetch-mode':'cors',
'sec-fetch-site':'same-origin',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
'x-requested-with':'XMLHttpRequest'
}
request = urllib.request.Request(url,headers=headers)
response = urllib.request.urlopen(request)
context = response.read().decode('utf-8')
#split切割
context = context.split('(')[1].split(')')[0]
# print(context)
with open('context.json','w',encoding='utf-8') as pf:
pf.write(context)
obj = json.load(open('context.json',mode='r',encoding='utf-8'))
data_city = jsonpath.jsonpath(obj,'$..regionName')
print(data_city)