Python之CSV模块的使用
csv的作用:
可以用csv模块处理从电子表格和数据库导出的数据,并写入使用字段和记录的格式的文本文件,这种格式通常被称为逗号分隔的格式(因为常用逗号来分隔记录中的字段)。
1、读取csv的文件
数据源
title1,title2,title3,title4 1,a,2020/4/22,! 2,b,2020/4/23,@ 3,c,2020/4/24,# 4,d,2020/4/25,$
import csv with open('test.csv','rt') as rf: reader = csv.reader(rf) for item in reader: print(item)
运行效果
['title1', 'title2', 'title3', 'title4'] ['1', 'a', '2020/4/22', '!'] ['2', 'b', '2020/4/23', '@'] ['3', 'c', '2020/4/24', '#'] ['4', 'd', '2020/4/25', '$']
2、写入csv的文件
import csv unicode_chars = 'å∫ç' with open('test_write.csv', 'wt', encoding='utf-8', newline='') as wf: writer = csv.writer(wf) writer.writerow(('title1', 'title2', 'title3')) for i in range(3): row = ( i + 1, chr(ord('a') + 1), unicode_chars[i], ) writer.writerow(row)
运行效果
title1,title2,title3 1,b,å 2,b,∫ 3,b,ç
3、设置写入的内容是否加引号规则
import csv unicode_chars = 'å∫ç' with open('test_write.csv', 'wt', encoding='utf-8', newline='') as wf: writer = csv.writer(wf, quoting=csv.QUOTE_NONNUMERIC) writer.writerow(('title1', 'title2', 'title3')) for i in range(3): row = ( i + 1, chr(ord('a') + 1), unicode_chars[i], ) writer.writerow(row)
csv.QUOTE_ALL : 设置所有内容加引号
csv.QUOTE_NONNUMERIC : 除了数字类型,其它的设置内容加引号
csv.QUOTE_MINIMAL : 设置包含特殊字符的字段加引号
csv.QUOTE_NONE : 设置所有的内容不加引号
运行效果
"title1","title2","title3" 1,"b","å" 2,"b","∫" 3,"b","ç"
4、csv能够解析的种类
import csv print(csv.list_dialects())
运行效果
['excel', 'excel-tab', 'unix']
5、自定义创建一个解析器
"Title 1"|"Title 2"|"Title 3" 1|"first line second line"|08/18/07
import csv # 注册一个解析器,分隔符为| csv.register_dialect('pipes', delimiter='|') with open('testdata.pipes', 'r') as f: reader = csv.reader(f, dialect='pipes') for row in reader: print(row)
运行效果
['Title 1', 'Title 2', 'Title 3'] ['1', 'first line\nsecond line', '08/18/07']
Attribute | Default | Meaning |
---|---|---|
delimiter | , |
Field separator (one character) |
doublequote | True | Flag controlling whether quotechar instances are doubled |
escapechar | None | Character used to indicate an escape sequence |
lineterminator | \r\n |
String used by writer to terminate a line |
quotechar | " |
String to surround fields containing special values (one character) |
quoting | QUOTE_MINIMAL |
Controls quoting behavior described earlier |
skipinitialspace | False | Ignore whitespace after the field delimiter |
6、查看不同的解析器,分隔符等参数的差异
import csv import sys # 注册一个解析器,符号为 \\ csv.register_dialect('escaped', escapechar='\\', doublequote=False, quoting=csv.QUOTE_NONE, ) # 注册一个解析器,符号为 ' csv.register_dialect('singlequote', quotechar="'", quoting=csv.QUOTE_ALL, ) # 获取内容是否加引号的对象 quoting_modes = { getattr(csv, n): n for n in dir(csv) if n.startswith('QUOTE_') } # 格式化的模板 TEMPLATE = ''' Dialect: "{name}" delimiter = {dl!r:<6} skipinitialspace = {si!r} doublequote = {dq!r:<6} quoting = {qu} quotechar = {qc!r:<6} lineterminator = {lt!r} escapechar = {ec!r:<6} ''' for name in sorted(csv.list_dialects()): dialect = csv.get_dialect(name) print(TEMPLATE.format( name=name, dl=dialect.delimiter, si=dialect.skipinitialspace, dq=dialect.doublequote, qu=quoting_modes[dialect.quoting], qc=dialect.quotechar, lt=dialect.lineterminator, ec=dialect.escapechar, )) writer = csv.writer(sys.stdout, dialect=dialect) writer.writerow( ('col1', 1, '10/01/2010', 'Special chars: " \' {} to parse'.format( dialect.delimiter)) )
运行效果
Dialect: "escaped" delimiter = ',' skipinitialspace = 0 doublequote = 0 quoting = QUOTE_NONE quotechar = '"' lineterminator = '\r\n' escapechar = '\\' col1,1,10/01/2010,Special chars: \" ' \, to parse Dialect: "excel" delimiter = ',' skipinitialspace = 0 doublequote = 1 quoting = QUOTE_MINIMAL quotechar = '"' lineterminator = '\r\n' escapechar = None col1,1,10/01/2010,"Special chars: "" ' , to parse" Dialect: "excel-tab" delimiter = '\t' skipinitialspace = 0 doublequote = 1 quoting = QUOTE_MINIMAL quotechar = '"' lineterminator = '\r\n' escapechar = None col1 1 10/01/2010 "Special chars: "" ' to parse" Dialect: "singlequote" delimiter = ',' skipinitialspace = 0 doublequote = 1 quoting = QUOTE_ALL quotechar = "'" lineterminator = '\r\n' escapechar = None 'col1','1','10/01/2010','Special chars: " '' , to parse' Dialect: "unix" delimiter = ',' skipinitialspace = 0 doublequote = 1 quoting = QUOTE_ALL quotechar = '"' lineterminator = '\n' escapechar = None "col1","1","10/01/2010","Special chars: "" ' , to parse"
7、为给定的样本猜测csv解析器,然后打印出结果
import csv from io import StringIO # 注册一个解析器,符号为 \\ csv.register_dialect('escaped', escapechar='\\', doublequote=False, quoting=csv.QUOTE_NONE, ) # 注册一个解析器,符号为 ' csv.register_dialect('singlequote', quotechar="'", quoting=csv.QUOTE_ALL, ) samples = [] # 获取python自带的解析器,写入到缓存中,用于下面解析. for name in sorted(csv.list_dialects()): buffer = StringIO() dialect = csv.get_dialect(name) writer = csv.writer(buffer, dialect=dialect) writer.writerow( ('col1', 1, '10/01/2010', 'Special chars " \' {} to parse'.format( dialect.delimiter)) ) samples.append((name, dialect, buffer.getvalue())) # 为给定的样本猜测解析器,然后使用结果 sniffer = csv.Sniffer() for name, expected, sample in samples: print('Dialect: "{}"'.format(name)) print('In: {}'.format(sample.rstrip())) dialect = sniffer.sniff(sample, delimiters=',\t') reader = csv.reader(StringIO(sample), dialect=dialect) print('Parsed:\n {}\n'.format('\n '.join(repr(r) for r in next(reader))))
运行效果
Dialect: "escaped" In: col1,1,10/01/2010,Special chars \" ' \, to parse Parsed: 'col1' '1' '10/01/2010' 'Special chars \\" \' \\' ' to parse' Dialect: "excel" In: col1,1,10/01/2010,"Special chars "" ' , to parse" Parsed: 'col1' '1' '10/01/2010' 'Special chars " \' , to parse' Dialect: "excel-tab" In: col1 1 10/01/2010 "Special chars "" ' to parse" Parsed: 'col1' '1' '10/01/2010' 'Special chars " \' \t to parse' Dialect: "singlequote" In: 'col1','1','10/01/2010','Special chars " '' , to parse' Parsed: 'col1' '1' '10/01/2010' 'Special chars " \' , to parse' Dialect: "unix" In: "col1","1","10/01/2010","Special chars "" ' , to parse" Parsed: 'col1' '1' '10/01/2010' 'Special chars " \' , to parse' Process finished with exit code 0
8、读取csv文件以字典的类型输出
import csv with open('test.csv', 'rt') as rf: reader = csv.DictReader(rf) for row in reader: print(row)
运行效果
OrderedDict([('title1', '1'), ('title2', 'a'), ('title3', '2020/4/22'), ('title4', '!')]) OrderedDict([('title1', '2'), ('title2', 'b'), ('title3', '2020/4/23'), ('title4', '@')]) OrderedDict([('title1', '3'), ('title2', 'c'), ('title3', '2020/4/24'), ('title4', '#')]) OrderedDict([('title1', '4'), ('title2', 'd'), ('title3', '2020/4/25'), ('title4', '$')])
9、写入csv文件以字典的类型写入
import csv fieldnames = ('Title 1', 'Title 2', 'Title 3', 'Title 4') headers = { n: n for n in fieldnames } unicode_chars = 'å∫ç' with open('test_write.csv', 'w', encoding='utf-8', newline='') as wf: # 设置写入的列标题 writer = csv.DictWriter(wf, fieldnames=fieldnames) writer.writeheader() for i in range(3): writer.writerow({ 'Title 1': i + 1, 'Title 2': chr(ord('a') + 1), 'Title 3': '08/{:02d}/07'.format(i + 1), 'Title 4': unicode_chars[i] })
运行效果
test_write.csv Title 1,Title 2,Title 3,Title 4 1,b,08/01/07,å 2,b,08/02/07,∫ 3,b,08/03/07,ç