爬取失信人员(只需输入姓名就能获取)
代码:
1 import requests 2 import json 3 import pandas as pd 4 def listname(iname): 5 name = [] 6 cardNum = [] 7 areaName = [] 8 for i in range(0, 100): 9 url = 'https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php' 10 para = { 11 'pn': str(i * 10), 12 'resource_id': '6899', 13 'query': '失信被执行人名单', 14 'iname': iname, 15 'rn': '10', 16 'ie': 'utf-8', 17 'oe': 'utf-8', 18 'format': 'json', 19 'cb': 'cb=jQuery110204669623007201096_1546820149157' 20 } 21 head = { 22 "Host": "sp0.baidu.com", 23 "Accept": "*/*", 24 "Connection": "keep-alive", 25 "Accept-Encoding": "gzip, deflate, br", 26 'Accept-Language': 'zh-CN,zh;q=0.8', 27 'Referer': 'https://www.baidu.com/s?wd=%E5%A4%B1%E4%BF%A1%E4%BA%BA', 28 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36' 29 } 30 rep = requests.get(url, headers=head, params=para) 31 rep_str = rep.content.decode() 32 rep_json = json.loads(rep_str[7:-2]) 33 rep_data = rep_json['data'] 34 for each in rep_data: 35 for data in each['result']: 36 name.append(data['iname']) 37 cardNum.append(data['cardNum']) 38 areaName.append(data['areaName']) 39 result_all = pd.DataFrame({'Name': iname, 'IDcard': cardNum, 'Area': areaName}) 40 result_signal = result_all.drop_duplicates() 41 result = result_signal.reset_index(drop=True) 42 return result 43 print(listname('XXX'))