python3 清洗json数据

-----------------
getCommunityData.py
-----------------

import json
import os
import os.path
import csv
import parameterConfig
from getFocusedPerson import get_focused_person


#遍历目录(rootdir) 遍历到的每个文件都执行dirFunc
def waklThroughDir(rootdir, outputfile, jsonfields, dirFunc):
#parent 为上一级目录
#filenames为目录下的所有文件
#os.walk为遍历获取目录下所有文件的系统函数
for parent, dirnames, filenames in os.walk(rootdir):
for filename in filenames:
#获取后缀为txt的文件
if(filename.split('.')[-1] == 'txt'):
#(os.path.join(parent, filename)这一句拼装完整的文件路径
#dirFunc为对该文件的操作函数
#jsonfields为需要解析的字段
#outputfile为解析后生成的文件名
dirFunc(os.path.join(parent, filename), jsonfields, outputfile)

#创建CSV文件并写入第一行
def createCsv(file, first_line):
if not os.path.exists(file):
csvfile = open(file, 'a+')
writer = csv.writer(csvfile)
writer.writerow(first_line)
else:
csvfile = open(file, 'a+')
writer = csv.writer(csvfile)
return writer

def jsonArrayToCsv(json_file_name, json_fields, csv_file_name):
#读取JSON文件的内容
text = open(json_file_name).read()
#特殊处理,去除从WINDOWS系统带过来的BOM特殊字符
if text.startswith(u'\ufeff'):
text = text.encode('utf8')[3:].decode('utf8')
#将文本内容的JSON数据转换成自定义的JSON对象
json_data = json.loads(text)
for row in json_data['rows']:
jsonToCsv(csv_file_name, row, json_fields)

def jsonToCsv(csv_file_name, data, json_fields):
#CSV文件的第一行标题
first_line = []
#需要写入CSV文件的数据内容
raw_data = []
#遍历json_fields获取转换格式
for fields in json_fields:
#将标题插入first_line
name = fields[0]
field = fields[1]
proc = fields[2]
first_line.append(name)
#获取字段名
split_value = field.split('/')
length = len(split_value)
#字段可能不存在
if split_value[0] not in data:
field_data = ""
#有两级字段
elif(length == 2):
field_data = data[split_value[0]][split_value[1]]
#有一级字段
else:
field_data = data[split_value[0]]
if(field_data != "" and proc != ""):
field_data = getattr(parameterConfig, proc)(field_data)
raw_data.append(field_data)
#创建CVS文件,并写入第一行
writer = createCsv(csv_file_name, first_line)
#写入数据内容
writer.writerow(raw_data)

def main_proc(proc_name, rootdir, outputfile, jsonfields):
#打印函数名
print(proc_name)
#将rootdir目录下的所有文件按照jsonfields的格式使用jsonArrayToCsv函数转换为名为outputfile的CSV文件
waklThroughDir(rootdir, outputfile, jsonfields, jsonArrayToCsv)


#主函数入口
if __name__ == '__main__':
main_proc("******", parameterConfig.PERSON_ROOTDIR, parameterConfig.PERSON_OUTPUTFILE, parameterConfig.PERSON_JSONFIELDS)
main_proc("******", parameterConfig.SYFW_ROOTDIR, parameterConfig.SYFW_OUTPUTFILE, parameterConfig.SYFW_JSONFIELDS)
main_proc("*******", parameterConfig.CZF_ROOTDIR, parameterConfig.CZF_OUTPUTFILE, parameterConfig.CZF_JSONFIELDS)
main_proc("*******", parameterConfig.XFAJ_ROOTDIR, parameterConfig.XFAJ_OUTPUTFILE, parameterConfig.XFAJ_JSONFIELDS)
main_proc("********", parameterConfig.LDRK_ROOTDIR, parameterConfig.LDRK_OUTPUTFILE, parameterConfig.LDRK_JSONFIELDS)
get_focused_person("********", parameterConfig.ZDRY_ROOTDIR)

----------------------
parameterConfig.py

----------------------

CZF_ROOTDIR = "出租房"
CZF_OUTPUTFILE = "出租房322.txt"
CZF_JSONFIELDS = [("地址", "address", ""),
("建造时间", "builtYear", ""),
("创建时间", "createDate", ""),
("危险系数编号", "hiddenDangerLevel/id", ""),
("房屋面积", "houseArea", ""),
("房屋结构编号", "houseStructure/id", ""),
("房屋类型", "houseType", ""),
("居住人数", "memberNum", ""),
("所在网格", "organization/orgName", ""),
("更新时间", "updateDate", ""),
("出租人姓名", "rentalPerson", ""),
("出租人联系方式", "rentalMobileNumber", ""),
("出租方式编号", "rentalType/id", "")
]


XFAJ_ROOTDIR = "消防安监"
XFAJ_OUTPUTFILE = "消防安监821.txt"
XFAJ_JSONFIELDS = [("地址", "address", ""),
("名称", "companyName", ""),
("负责人姓名", "manger", ""),
("负责人联系方式", "managerTelephone", ""),
("所在网格", "orgPathName", "")
]


SYFW_ROOTDIR = "实有房屋"
SYFW_OUTPUTFILE = "实有房屋3969.txt"
SYFW_JSONFIELDS = [("地址", "address", ""),
("是否是出租房", "isRentalHouse", "getRentalHouse"),
("是否是空置房", "memberNum", "getEmptyHouse"),
("居住人数", "memberNum", ""),
("数据录入时间", "createDate", ""),
("数据更新时间", "updateDate", ""),
("所在网格", "organization/id", ""),
("房屋编号", "id", ""),
("经度", "updateDate", ""),
("纬度", "rentalPerson", "")
]
def getRentalHouse(rentalhouse):
if (rentalhouse == 'false'):
isRentalHouse = '否'
else:
isRentalHouse = '是'
return isRentalHouse

def getEmptyHouse(memberNum):
if (memberNum == '0'):
isEmptyHouse = '是'
else:
isEmptyHouse = '否'
return isEmptyHouse


PERSON_ROOTDIR = "户籍人口12415"
PERSON_OUTPUTFILE = "户籍人口.txt"
PERSON_JSONFIELDS = [("姓名", "name", ""),
("性别", "gender/id", "getGender"),
("身份证号", "idCardNo", ""),
("住所地址", "currentAddress", ""),
("户籍地址", "nativePlaceAddress", ""),
("手机号", "mobileNumber", ""),
("座机号", "telephone", ""),
("户籍派出所", "nativePoliceStation", ""),
("是否死亡", "death", "isDeath"),
("所在网格", "organization/orgName", ""),
("创建时间", "createDate", ""),
("更新时间", "updateDate", ""),
("生日", "birthday", ""),
("省", "province", ""),
("市", "city", ""),
("区", "district", ""),
("个人编号", "id", ""),
("房屋编号", "houseId", ""),
]
def getGender(id):
if (id == '1'):
gender = '男'
else:
gender = '女'
return gender

def isDeath(death):
if (death == 'false'):
death = '否'
else:
death = '是'
return death


LDRK_ROOTDIR = "流动人口"
LDRK_OUTPUTFILE = "流动人口830.txt"
LDRK_JSONFIELDS = [("姓名", "name", ""),
("性别", "gender/id", "getGender"),
("身份证号", "idCardNo", ""),
("住所地址", "currentAddress", ""),
("户籍地址", "nativePlaceAddress", ""),
("手机号", "mobileNumber", ""),
("座机号", "telephone", ""),
("是否死亡", "death", "isDeath"),
("所在网格", "organization/orgName", ""),
("创建时间", "createDate", ""),
("更新时间", "updateDate", ""),
("生日", "birthday", ""),
("省", "province", ""),
("市", "city", ""),
("区", "district", ""),
("个人编号", "id", "")
]


ZDRY_ROOTDIR = "重点人员"



posted on 2016-09-14 09:27  Kernel_wu  阅读(1784)  评论(0编辑  收藏  举报

导航