Python pandas
pandas 有自己的格式,总结起来
有以下共通点
1.函数,比如格式的series,DataFrame,或者numpy后面跟的数据函数,例如exp,arange,后面肯定是跟的()
2.数据,series,frame,dict,指定行列,赋值调取数据,后面跟的是【】
3.series 格式比较固定,一部分是index,另一部分是value.obj2= Series([4,7,-5,3],index=['d','b','a','c'])
4.frame 同样,分三部分, data,columns,index,
frame2= DataFrame(data,columns=['year','state','pop','debt'],index=['one','two','three','four','five'])
没有数据的都用NAN补充
5.一般的基本的数据层级为pop={'Nevada':{2001:2.4,2002:2.9},'Ohio':{2000:1.5,2001:1.7,2002:3.6}} {E:{A:B,C:D},F:{A:B,C:D}}
6.转置 frame3.T
7.获取data.
'''get value by columns
frame2['state']
'''get value by index
frame2.ix['three']
from pandas import Series,DataFrame
import pandas as pd
'''index
obj2= Series([4,7,-5,3],index=['d','b','a','c'])
obj2.values
obj2.index
obj2['a']
obj2['c']
obj2['d'] = 6
obj2[['c','d','a']]
obj2
obj2[obj2>0]
obj2*2
import numpy as np
np.exp(obj2)
''' dict
'b' in obj2
'e' in obj2
data = {'Ohio':35000,'Texas':71000,'Oregon':16000,'Utah':5000}
obj3 = Series(data)
obj3
'''passing a dict
states = ['California','Ohio','Oregon','Texas']
obj4 = Series(data,index=states)
'''missing data
pd.isnull(obj4)
pd.notnull(obj4)
'''auto align index
obj3
obj4
obj3+obj4
'''name and rename
obj4.name = 'population'
obj4.index.name = 'state'
obj4
obj2.index=['Bob','Helen','Jeff','Ryan']
'''dataframe,by default by name order
data={'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],
'year':[2000,2001,2002,2001,2002],
'pop':[1.5,1.7,3.6,2.4,2.9]}
frame = DataFrame(data)
'''order by special order
DataFrame(data,columns =['year','state','pop'])
''' add one column and index
frame2= DataFrame(data,columns=['year','state','pop','debt'],index=['one','two','three','four','five'])
frame2.columns
'''get value by columns
frame2['state']
'''get value by index
frame2.ix['three']
'''assignment by index
frame2['debt'] = 16.5
frame2['debt'] = np.arange(5.)
val = Series([-1.2,-1.5,-1.7],index=['two','four','five'])
frame2['debt'] = val
'''del columns
frame2['eastern'] = frame2.state == 'Ohio'
del frame2['eastern']
'''nest dict
pop={'Nevada':{2001:2.4,2002:2.9},'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
frame3=DataFrame(pop)
'''transpose the frame
frame3.T
'''index changes different with series
DataFrame(pop,index=[2001,2002,2003])
'''dataframe get value
pdata = {'Ohio':frame3['Ohio'][:-1],
'Nevada':frame3['Nevada'][:2]}
DataFrame(pdata)
'''data frame index and column name attribute
frame3.index.name = 'year';
frame3.columns.name = 'state'
frame3