numpy中reindex函数用法
import numpy as np import pandas as pd from pandas import Series, DataFrame np.random.seed(666) # series reindex s1 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D']) print(s1) ''' A 1 B 2 C 3 D 4 dtype: int64 ''' # 重新指定 index, 多出来的index,可以使用fill_value 填充 print(s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value = 10)) ''' A 1 B 2 C 3 D 4 E 10 dtype: int64 ''' s2 = Series(['A', 'B', 'C'], index = [1, 5, 10]) print(s2) ''' 1 A 5 B 10 C dtype: object ''' # 修改索引, # 将s2的索引增加到15个 # 如果新增加的索引值不存在,默认为 Nan print(s2.reindex(index=range(15))) ''' 0 NaN 1 A 2 NaN 3 NaN 4 NaN 5 B 6 NaN 7 NaN 8 NaN 9 NaN 10 C 11 NaN 12 NaN 13 NaN 14 NaN dtype: object ''' # ffill : foreaward fill 向前填充, # 如果新增加索引的值不存在,那么按照前一个非nan的值填充进去 print(s2.reindex(index=range(15), method='ffill')) ''' 0 NaN 1 A 2 A 3 A 4 A 5 B 6 B 7 B 8 B 9 B 10 C 11 C 12 C 13 C 14 C dtype: object ''' # reindex dataframe df1 = DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5']) print(df1) ''' c1 c2 c3 c4 c5 A 0.700437 0.844187 0.676514 0.727858 0.951458 B 0.012703 0.413588 0.048813 0.099929 0.508066 D 0.200248 0.744154 0.192892 0.700845 0.293228 E 0.774479 0.005109 0.112858 0.110954 0.247668 F 0.023236 0.727321 0.340035 0.197503 0.909180 ''' # 为 dataframe 添加一个新的索引 # 可以看到 自动 扩充为 nan print(df1.reindex(index=['A', 'B', 'C', 'D', 'E', 'F'])) ''' 自动填充为 nan c1 c2 c3 c4 c5 A 0.700437 0.844187 0.676514 0.727858 0.951458 B 0.012703 0.413588 0.048813 0.099929 0.508066 C NaN NaN NaN NaN NaN D 0.200248 0.744154 0.192892 0.700845 0.293228 E 0.774479 0.005109 0.112858 0.110954 0.247668 F 0.023236 0.727321 0.340035 0.197503 0.909180 ''' # 扩充列, 也是一样的 print(df1.reindex(columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6'])) ''' c1 c2 c3 c4 c5 c6 A 0.700437 0.844187 0.676514 0.727858 0.951458 NaN B 0.012703 0.413588 0.048813 0.099929 0.508066 NaN D 0.200248 0.744154 0.192892 0.700845 0.293228 NaN E 0.774479 0.005109 0.112858 0.110954 0.247668 NaN F 0.023236 0.727321 0.340035 0.197503 0.909180 NaN ''' # 减小 index print(s1.reindex(['A', 'B'])) ''' 相当于一个切割效果 A 1 B 2 dtype: int64 ''' print(df1.reindex(index=['A', 'B'])) ''' 同样是一个切片的效果 c1 c2 c3 c4 c5 A 0.601977 0.619927 0.251234 0.305101 0.491200 B 0.244261 0.734863 0.569936 0.889996 0.017936 ''' # 对于一个 serie 来说,可以使用 drop,来丢掉某些 index print(s1.drop('A')) ''' 就只剩下 三个了 B 2 C 3 D 4 dtype: int64 ''' # dataframe drop(A) 直接去掉一行 print(df1.drop('A', axis=0)) ''' axis 默认 是 行 c1 c2 c3 c4 c5 B 0.571883 0.254364 0.530883 0.295224 0.352663 D 0.858452 0.379495 0.593284 0.786078 0.949718 E 0.556276 0.643187 0.808664 0.289422 0.501041 F 0.737993 0.286072 0.332714 0.873371 0.421615 ''' print(df1.drop('c1', axis=1)) ''' 将 c1 的列 去掉 c2 c3 c4 c5 A 0.326681 0.247832 0.601982 0.145905 B 0.373961 0.393819 0.439284 0.926706 D 0.558490 0.617851 0.461280 0.373102 E 0.030434 0.566498 0.383103 0.739243 F 0.982220 0.989826 0.957863 0.411514 '''