Python-爬虫-(Json和Csv)文件存储
用解析器解析出的数据后,可以通过TXT、JSON、CSV等文件形式进行存储;
1、TXT形式此处略;
2、JSON文件存储
json即js 对象标记,是一种数据格式;
json格式:
json对象:{"username":"ADMIN","pwd":"xxxx","address":"北京"}
json数组:[{"name":"smith","age":"23"},{"name":"jones","age":"23"},{"name":"zye","age":"23"}]
例如:
1 import json 2 str=''' 3 [{"name":"小明","age":"23"},{"name":"jones","age":"23"},{"name":"zye","age":"23"}] 4 ''' 5 print(type(str)) 6 obj=json.loads(str)#将json格式字符串转为json 7 print(type(obj)) 8 #取值 9 print(obj[0]['name']) 10 print(obj[1]['age']) 11 print(obj[1].get('name')) 12 print(obj[1].get('address','北京'))#即如果该字段为None或者该key不存在时,会采用默认值;即第二个参数为默认值 13 14 #加载一个json文件 15 with open("read.json",'r') as file: 16 s=file.read() 17 j=json.loads(s) 18 print(j[0]['name']) 19 20 #输出json 21 jsonArray=[{'name':'sd'},{'name':'ss'}] 22 with open('test.json','w') as file: 23 file.write(json.dumps(jsonArray)) 24 25 #或者: 26 jsonObj=json.loads(str) 27 with open('test.json','w') as file: 28 file.write(json.dumps(jsonObj)) 29 30 #或者:如果有中文,且想格式化 31 jsonObj=json.loads(str) 32 with open('test.json','w',encoding='utf-8') as file: 33 file.write(json.dumps(jsonObj,indent=2,ensure_ascii=False))#格式化了json,
写入test.json后格式:
[
{
"name": "小明",
"age": "23"
},
{
"name": "jones",
"age": "23"
},
{
"name": "zye",
"age": "23"
}
]
2、CSV文件操作
CSV通过分隔符进行风格以纯文本形式存储表格数据;该文件是一个字符序列,可以任意数目组成的记录,记录之间通过换行符分离;
将数据写入csv文件
1 import csv 2 with open ('data .csv', 'w',newline='') as csvfile:# newline=''设置换行,否则会出现空行 3 writer = csv.writer(csvfile,delimiter=':')#delimiter=':' 设置:号隔开 4 writer.writerow (['id', 'ame','age']) 5 writer. writerow( ['10001', 'Mike',20]) 6 writer .writerow (['10002','Bob',22]) 7 8 writer.writerows([['1003','JEL',23],['1004','JESR',34],['1005','ESR',34]]) 9 csvfile.close() 10 11 #字典形式写入 12 with open('abc.csv','w',newline='',encoding='utf-8 ') as myfile:#设置w为a则此时会像文档中追加内容 13 fieldnames=['id','name','addr'] 14 writer=csv.DictWriter(myfile,delimiter='!',fieldnames=fieldnames) 15 writer.writeheader() 16 writer.writerow({'id':'1001','name':'JONES','addr':'北京'}) 17 writer.writerow({'id': '1002', 'name': 'smith', 'addr': '北京'}) 18 writer.writerow({'id': '1003', 'name': 'jenes', 'addr': '北京'})
读取csv文件:
1 #读取数据 2 with open('abc.csv','r',encoding='utf-8') as myfile: 3 csvReader=csv.reader(myfile) 4 for r in csvReader: 5 print(r)