pandas --index ,reindex, set_index, reset_index , reindex_like函数 之 reindex
reindex(
labels=None,
index=None,
columns=None,
axis=None,
method=None,
copy=True,
level=None,
fill_value=nan,
limit=None,
tolerance=None,
)
Docstring:
Conform DataFrame to new index with optional filling logic.
or Conform Series to new index with optional filling logic.
Parameters ---------- index : array-like, optional New labels / index to conform to, should be specified using keywords. Preferably an Index object to avoid duplicating data. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. * None (default): don't fill gaps * pad / ffill: Propagate last valid observation forward to next valid. * backfill / bfill: Use next valid observation to fill gap. * nearest: Use nearest valid observations to fill gap. copy : bool, default True Return a new object, even if the passed indexes are the same. level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level. fill_value : scalar, default np.NaN Value to use for missing values. Defaults to NaN, but can be any "compatible" value. limit : int, default None Maximum number of consecutive elements to forward or backward fill. tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. Tolerance may be a scalar value, which applies the same tolerance to all values, or list-like, which applies variable tolerance per element. List-like includes list, tuple, array, Series, and must be the same size as the index and its dtype must exactly match the index's type.
import numpy as np import pandas as pd from pandas import Series, DataFrame np.random.seed(666) # series reindex s1 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D']) print(s1) ''' A 1 B 2 C 3 D 4 dtype: int64 ''' # 重新指定 index, 多出来的index,可以使用fill_value 填充 print(s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value = 10)) ''' A 1 B 2 C 3 D 4 E 10 dtype: int64 ''' s2 = Series(['A', 'B', 'C'], index = [1, 5, 10]) print(s2) ''' 1 A 5 B 10 C dtype: object ''' # 修改索引, # 将s2的索引增加到15个 # 如果新增加的索引值不存在,默认为 Nan print(s2.reindex(index=range(15))) ''' 0 NaN 1 A 2 NaN 3 NaN 4 NaN 5 B 6 NaN 7 NaN 8 NaN 9 NaN 10 C 11 NaN 12 NaN 13 NaN 14 NaN dtype: object ''' # ffill : foreaward fill 向前填充, # 如果新增加索引的值不存在,那么按照前一个非nan的值填充进去 print(s2.reindex(index=range(15), method='ffill')) ''' 0 NaN 1 A 2 A 3 A 4 A 5 B 6 B 7 B 8 B 9 B 10 C 11 C 12 C 13 C 14 C dtype: object ''' # reindex dataframe df1 = DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5']) print(df1) ''' c1 c2 c3 c4 c5 A 0.700437 0.844187 0.676514 0.727858 0.951458 B 0.012703 0.413588 0.048813 0.099929 0.508066 D 0.200248 0.744154 0.192892 0.700845 0.293228 E 0.774479 0.005109 0.112858 0.110954 0.247668 F 0.023236 0.727321 0.340035 0.197503 0.909180 ''' # 为 dataframe 添加一个新的索引 # 可以看到 自动 扩充为 nan print(df1.reindex(index=['A', 'B', 'C', 'D', 'E', 'F'])) ''' 自动填充为 nan c1 c2 c3 c4 c5 A 0.700437 0.844187 0.676514 0.727858 0.951458 B 0.012703 0.413588 0.048813 0.099929 0.508066 C NaN NaN NaN NaN NaN D 0.200248 0.744154 0.192892 0.700845 0.293228 E 0.774479 0.005109 0.112858 0.110954 0.247668 F 0.023236 0.727321 0.340035 0.197503 0.909180 ''' # 扩充列, 也是一样的 print(df1.reindex(columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6'])) ''' c1 c2 c3 c4 c5 c6 A 0.700437 0.844187 0.676514 0.727858 0.951458 NaN B 0.012703 0.413588 0.048813 0.099929 0.508066 NaN D 0.200248 0.744154 0.192892 0.700845 0.293228 NaN E 0.774479 0.005109 0.112858 0.110954 0.247668 NaN F 0.023236 0.727321 0.340035 0.197503 0.909180 NaN ''' # 减小 index print(s1.reindex(['A', 'B'])) ''' 相当于一个切割效果 A 1 B 2 dtype: int64 ''' print(df1.reindex(index=['A', 'B'])) ''' 同样是一个切片的效果 c1 c2 c3 c4 c5 A 0.601977 0.619927 0.251234 0.305101 0.491200 B 0.244261 0.734863 0.569936 0.889996 0.017936 ''' # 对于一个 serie 来说,可以使用 drop,来丢掉某些 index print(s1.drop('A')) ''' 就只剩下 三个了 B 2 C 3 D 4 dtype: int64 ''' # dataframe drop(A) 直接去掉一行 print(df1.drop('A', axis=0)) ''' axis 默认 是 行 c1 c2 c3 c4 c5 B 0.571883 0.254364 0.530883 0.295224 0.352663 D 0.858452 0.379495 0.593284 0.786078 0.949718 E 0.556276 0.643187 0.808664 0.289422 0.501041 F 0.737993 0.286072 0.332714 0.873371 0.421615 ''' print(df1.drop('c1', axis=1)) ''' 将 c1 的列 去掉 c2 c3 c4 c5 A 0.326681 0.247832 0.601982 0.145905 B 0.373961 0.393819 0.439284 0.926706 D 0.558490 0.617851 0.461280 0.373102 E 0.030434 0.566498 0.383103 0.739243 F 0.982220 0.989826 0.957863 0.411514 '''