#Pandas数据结构Dataframe:基本技巧
#数据查看、转置 / 添加、修改、删除值 / 对齐 / 排序
import numpy as np
import pandas as pd
#数据查看、转置
df = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
print(df)
print(df.head(2)) #查看头默认值是5条数据
print(df.tail(3)) #查看尾
print(df.T) #行列转置
a b
0 13.001170 52.302971
1 45.443235 17.136341
2 87.749437 61.681361
3 27.203306 54.923447
4 27.661213 28.096315
a b
0 13.001170 52.302971
1 45.443235 17.136341
a b
2 87.749437 61.681361
3 27.203306 54.923447
4 27.661213 28.096315
0 1 2 3 4
a 13.001170 45.443235 87.749437 27.203306 27.661213
b 52.302971 17.136341 61.681361 54.923447 28.096315
#添加,修改,删除
#添加和修改:和字典的操作类似
df = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
df['c']=10 #添加列c全为10
df.loc[5]=20 #添加行5值为20
df.loc[1:3,'a':'c']=30 #修改行1-3,列a-c的值为30
print(df)
#删除,一般都用drop
df1 = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
df1.drop(['a'],axis=1,inplace=True)#drop()删除列,需要加上axis = 1,inplace=True → 改变原数据
df2 = df.drop([0],axis=0) #drop()默认删除行,本质是axis = 0,inplace=False → 生产新的数据,不改变原数据
print(df1)
print(df2)
#删除列还可以有 del df['a'] 一般不使用
a b c
0 1.402976 7.213545 10
1 30.000000 30.000000 30
2 30.000000 30.000000 30
3 30.000000 30.000000 30
4 16.940650 71.386239 10
5 20.000000 20.000000 20
b
0 20.961412
1 86.214446
2 35.080610
3 9.396529
4 9.427302
a b c
1 30.00000 30.000000 30
2 30.00000 30.000000 30
3 30.00000 30.000000 30
4 16.94065 71.386239 10
5 20.00000 20.000000 20
# 对齐
df1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
print(df1)
print(df2)
print(df1 + df2)
# DataFrame对象之间的数据自动按照列和索引(行标签)对齐相加,任何值加nan还是nan
A B C D
0 -0.186700 0.654873 -0.675748 1.274324
1 -0.203601 0.522645 1.327030 -1.211796
2 1.210807 -0.333704 -0.068803 0.626071
3 0.381998 1.352354 -1.122596 -0.039185
4 -1.794919 -0.636484 -1.248661 0.595253
5 -0.724729 0.845360 -0.318300 0.154419
6 -1.363716 0.539871 -0.466797 -0.991755
7 -1.746204 -0.211044 0.265923 1.479545
8 0.964734 1.702910 -1.231199 -0.095801
9 0.027144 0.565912 0.494676 0.297138
A B C
0 -0.266384 -0.697483 -0.787006
1 -1.247451 1.418789 -0.728944
2 -1.750087 0.108112 0.721652
3 0.131859 -1.157439 0.169533
4 1.051047 -0.395463 1.161415
5 -0.232839 -0.203111 -0.135780
6 -1.933584 0.280714 -1.493124
A B C D
0 -0.453084 -0.042610 -1.462754 NaN
1 -1.451051 1.941434 0.598086 NaN
2 -0.539280 -0.225593 0.652849 NaN
3 0.513857 0.194915 -0.953063 NaN
4 -0.743872 -1.031947 -0.087246 NaN
5 -0.957568 0.642249 -0.454080 NaN
6 -3.297300 0.820585 -1.959921 NaN
7 NaN NaN NaN NaN
8 NaN NaN NaN NaN
9 NaN NaN NaN NaN
#排序
#1.按值排序(单个值) sort_values()
df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
columns = ['a','b','c','d'])
print(df1)
print(df1.sort_values(['a'], ascending = True)) # 升序
print(df1.sort_values(['a'], ascending = False)) # 降序
print('------')
# ascending参数:设置升序降序,默认升序
#按值排序(多个值,依次往后为备排序选项)
df2 = pd.DataFrame({'a':[1,1,1,1,2,2,2,2],
'b':list(range(8)),
'c':list(range(8,0,-1))})
print(df2)
print(df2.sort_values(['a','c']))
#2.按行索引排序 sort_index()
df3 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = [5,4,3,2],
columns = ['a','b','c','d'])
df4 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
index = ['h','s','x','g'],
columns = ['a','b','c','d'])
print(df3)
print(df3.sort_index())
print(df4)
print(df4.sort_index())
a b c d
0 14.363296 24.468750 0.862332 78.414560
1 50.376623 70.058587 12.016014 43.824154
2 9.164948 6.009718 59.899663 34.608598
3 32.776729 5.877378 19.845187 18.427510
a b c d
2 9.164948 6.009718 59.899663 34.608598
0 14.363296 24.468750 0.862332 78.414560
3 32.776729 5.877378 19.845187 18.427510
1 50.376623 70.058587 12.016014 43.824154
a b c d
1 50.376623 70.058587 12.016014 43.824154
3 32.776729 5.877378 19.845187 18.427510
0 14.363296 24.468750 0.862332 78.414560
2 9.164948 6.009718 59.899663 34.608598
------
a b c
0 1 0 8
1 1 1 7
2 1 2 6
3 1 3 5
4 2 4 4
5 2 5 3
6 2 6 2
7 2 7 1
a b c
3 1 3 5
2 1 2 6
1 1 1 7
0 1 0 8
7 2 7 1
6 2 6 2
5 2 5 3
4 2 4 4
a b c d
5 31.649529 84.868273 35.960909 17.991508
4 70.713399 3.196341 1.838718 2.587589
3 22.504265 3.529035 29.175653 61.559803
2 10.746665 74.852949 49.412317 28.020466
a b c d
2 10.746665 74.852949 49.412317 28.020466
3 22.504265 3.529035 29.175653 61.559803
4 70.713399 3.196341 1.838718 2.587589
5 31.649529 84.868273 35.960909 17.991508
a b c d
h 77.066414 3.475510 64.363116 51.800866
s 40.764677 42.263241 9.385392 44.899110
x 12.105920 29.185573 59.567246 79.056572
g 67.852492 11.714584 84.943183 75.268707
a b c d
g 67.852492 11.714584 84.943183 75.268707
h 77.066414 3.475510 64.363116 51.800866
s 40.764677 42.263241 9.385392 44.899110
x 12.105920 29.185573 59.567246 79.056572