Python 清洗数据

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

s=Series([1,2,3],index=['a','b','c'])
print(s)
'''
a    1
b    2
c    3
dtype: int64
'''
print(np.max(s))#可以进行np运算

s.name='rank'
s.index.name='name'
print(s)

#创建DataFrame
sdata1={'name':['a','b','c'],'rank':[1,2,3],'score':[98,89,54]}
print(sdata1)#字典
df1=DataFrame(sdata1)
print(df1)
'''
  name  rank  score
0    a     1     98
1    b     2     89
2    c     3     54
'''
df2=DataFrame(sdata1,columns=['score','name','rank'])
print(df2)
'''
可以自动对齐,只是位置变化
   score name  rank
0     98    a     1
1     89    b     2
2     54    c     3
'''
df3=DataFrame(sdata1,columns=['score','name','rank','class'],index=['1','2','3'])
print(df3)
'''
class这列是缺失值
   score name  rank class
1     98    a     1   NaN
2     89    b     2   NaN
3     54    c     3   NaN
'''
df4=df3.reindex(['1','2','3','4'])
print(df4)
'''
重新索引
   score name  rank class
1   98.0    a   1.0   NaN
2   89.0    b   2.0   NaN
3   54.0    c   3.0   NaN
4    NaN  NaN   NaN   NaN
'''

print(df4['score'])
print(df4.ix['1'])
print(df2[df2['score']>60])#返回df2中score大于60的值
'''
   score name  rank
0     98    a     1
1     89    b     2
'''
del df3['class']
print(df3)#删除class这列

sdata1={'name':['a','b','c'],'rank':[1,2,3],'score':[98,89,54]}
print(sdata1)
df3=DataFrame(sdata1,columns=['score','name','rank','class'],index=['1','2','3'])
del df3['class']
print(df3)
print(df3.reindex(['1','2','3','4']))
print(df3.reindex(['1','2','3','4'],fill_value=0))#缺失值赋值为0
'''
   score name  rank
1     98    a     1
2     89    b     2
3     54    c     3
4      0    0     0
'''

print(df3.reindex(['0','1','2','3']))
'''
   score name  rank
0    NaN  NaN   NaN
1   98.0    a   1.0
2   89.0    b   2.0
3   54.0    c   3.0
'''
print(df3.reindex(['0','1','2','3'],method='bfill'))#向后填充
'''
   score name  rank
0     98    a     1
1     98    a     1
2     89    b     2
3     54    c     3
'''

print(df3.drop('1'))#删除第一行
print(df3.drop('score',axis=1))#删除指定列,axis是维数,0是行,1是列

print(df3.T)#转置

 

posted @ 2018-07-17 19:54  Lzxanthony  阅读(263)  评论(1编辑  收藏  举报