数据加载、存储于文件格式:读写文本格式的数据
import pandas as pd df1 = pd.read_csv("examples\ex1.csv") print(df1) ''' a b c d message 0 1 2 3 4 hello 1 5 6 7 8 world 2 9 10 11 12 foo ''' df2 = pd.read_table("examples\ex1.csv") # 默认分隔符"\t" print(df2) ''' a,b,c,d,message 0 1,2,3,4,hello 1 5,6,7,8,world 2 9,10,11,12,foo ''' df3 = pd.read_table("examples\ex1.csv",sep=',') # 指定分隔符 print(df3) ''' a b c d message 0 1 2 3 4 hello 1 5 6 7 8 world 2 9 10 11 12 foo ''' # 无列名时候pandas分配默认列名 df4 = pd.read_csv("examples\ex2.csv",header=None) print(df4) ''' 0 1 2 3 4 0 1 2 3 4 hello 1 5 6 7 8 world 2 9 10 11 12 foo ''' # 自定义列名 df5 = pd.read_csv("examples\ex2.csv",names=['a','b','c','d','message']) print(df5) ''' a b c d message 0 1 2 3 4 hello 1 5 6 7 8 world 2 9 10 11 12 foo ''' # 把某列设置为索引,可通过index_col参数 names=['a','b','c','d','message'] df6 = pd.read_csv("examples\ex2.csv",names=names,index_col='message') print(df6) ''' a b c d message hello 1 2 3 4 world 5 6 7 8 foo 9 10 11 12 ''' # 层次化索引,传入列编号或者列名组成的列表 parsed = pd.read_csv("examples/csv_mindex.csv",index_col=['key1','key2']) print(parsed) ''' value1 value2 key1 key2 one a 1 2 b 3 4 c 5 6 d 7 8 two a 9 10 b 11 12 c 13 14 d 15 16 ''' # 数量不封的空白分隔符,可使用正则表达式\s+表示 print(list(open("examples/ex3.txt"))) ''' [' A B C\n', 'aaa -0.264438 -1.026059 -0.619500\n', 'bbb 0.927272 0.302904 -0.032399\n', 'ccc -0.264273 -0.386314 -0.217601\n', 'ddd -0.871858 -0.348382 1.100491\n'] ''' result = pd.read_table("examples/ex3.txt",sep='\s+') print(result) ''' A B C aaa -0.264438 -1.026059 -0.619500 bbb 0.927272 0.302904 -0.032399 ccc -0.264273 -0.386314 -0.217601 ddd -0.871858 -0.348382 1.100491 ''' # 可用skiprows跳过某些行 df7 = pd.read_csv("examples/ex4.csv",skiprows=[0,2,3]) print(df7) ''' a b c d message 0 1 2 3 4 hello 1 5 6 7 8 world 2 9 10 11 12 foo ''' # 缺失值处理 result = pd.read_csv("examples/ex5.csv") print(result) ''' something a b c d message 0 one 1 2 3.0 4 NaN 1 two 5 6 NaN 8 world 2 three 9 10 11.0 12 foo ''' print(pd.isnull(result)) ''' something a b c d message 0 False False False False False True 1 False False False True False False 2 False False False False False False ''' # na_values可以接受一组用于表示缺失值的字符串 result = pd.read_csv("examples/ex5.csv",na_values=["NULL"]) print(result) ''' something a b c d message 0 one 1 2 3.0 4 NaN 1 two 5 6 NaN 8 world 2 three 9 10 11.0 12 foo ''' # 可用一个字典为各列指定不同的NA标记值 sentinels = {'message':['foo','NA'],'something':['two','three']} result = pd.read_csv("examples/ex5.csv",na_values=sentinels) print(result) ''' something a b c d message 0 one 1 2 3.0 4 NaN 1 NaN 5 6 NaN 8 world 2 NaN 9 10 11.0 12 NaN '''
- sep或delimiter:用于对各行字段拆分的字符串或正则表达式
- header:列名行号,默认0为第一行,没有没有header应该设置为None
- index_col:用作行索引的列编号或者列名
- names:列名列表,结合header=None
- skiprows:需要跳过的行
- na_values:一组用于替换NA的值
- comment:将注释信息用行位拆分出去
- nrows:需要读取的行数,从文件开始算起
- skip_footer:需要忽略的行数,从文件末尾算起
本文来自博客园,作者:OTAKU_nicole,转载请注明原文链接:https://www.cnblogs.com/nicole-zhang/p/14417269.html