DataFrame 学习笔记
由数组字典构造DataFrame
from pandas import Series,DataFrame
import numpy as np
import pandas as pd
data={'state':['ohio','ohio','ohio','Nevada','Nevada'],
'year':[2000,2001,2002,2001,2002],
'pop':[1.5,1.7,3.6,2.4,2.9]}
frame=DataFrame(data)
Out[103]:
pop state year
0 1.5 ohio 2000
1 1.7 ohio 2001
2 3.6 ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002
改变序列顺序
DataFrame(data,columns=['year','state','pop'])
Out[104]:
year state pop
0 2000 ohio 1.5
1 2001 ohio 1.7
2 2002 ohio 3.6
3 2001 Nevada 2.4
4 2002 Nevada 2.9
改变标签,增加列没有数据,显示NaN
frame2=DataFrame(data,columns=['year','state','pop','debt'], index=['one','two','three','four','five'])
frame2
Out[108]:
year state pop debt
one 2000 ohio 1.5 NaN
two 2001 ohio 1.7 NaN
three 2002 ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN
frame2.columns
Out[111]: Index(['year', 'state', 'pop', 'debt'], dtype='object')
frame2.state=frame2[‘state’] 用法相同
frame2.state
Out[112]:
one ohio
two ohio
three ohio
four Nevada
five Nevada
Name: state, dtype: object
frame2['state']
Out[113]:
one ohio
two ohio
three ohio
four Nevada
five Nevada
Name: state, dtype: object
debt赋值
frame2['debt']=16.5
frame2
Out[115]:
year state pop debt
one 2000 ohio 1.5 16.5
two 2001 ohio 1.7 16.5
three 2002 ohio 3.6 16.5
four 2001 Nevada 2.4 16.5
five 2002 Nevada 2.9 16.5
frame2['debt']=np.arange(5)
frame2.debt=np.arange(5)
frame2
Out[120]:
year state pop debt
one 2000 ohio 1.5 0
two 2001 ohio 1.7 1
three 2002 ohio 3.6 2
four 2001 Nevada 2.4 3
five 2002 Nevada 2.9 4
val=Series([-1.2,-1.5,-1.7],index=['two','four','five'])
frame2['debt']=val
frame2
Out[127]:
year state pop debt
one 2000 ohio 1.5 NaN
two 2001 ohio 1.7 -1.2
three 2002 ohio 3.6 NaN
four 2001 Nevada 2.4 -1.5
five 2002 Nevada 2.9 -1.7
frame2['eastern']=frame2.state=='ohio'
frame2
Out[129]:
year state pop debt eastern
one 2000 ohio 1.5 NaN True
two 2001 ohio 1.7 -1.2 True
three 2002 ohio 3.6 NaN True
four 2001 Nevada 2.4 -1.5 False
five 2002 Nevada 2.9 -1.7 False
删除列
del frame2['eastern']
frame2
Out[131]:
year state pop debt
one 2000 ohio 1.5 NaN
two 2001 ohio 1.7 -1.2
three 2002 ohio 3.6 NaN
four 2001 Nevada 2.4 -1.5
five 2002 Nevada 2.9 -1.7
frame2.columns
Out[132]: Index(['year', 'state', 'pop', 'debt'], dtype='object')