pandas-重新索引
pandas-重新索引
reindex()
DataFrame.reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=nan, limit=None, tolerance=None)
支持两种调用
- (index=index_labels, columns=column_labels, ...)
- (labels, axis={'index', 'columns'}, ...)
创建一个新索引并重新索引该数据框。默认情况下,将分配新索引中在数据框中没有对应记录的值NaN
import pandas as pd
import numpy as np
N=6
df = pd.DataFrame({'A': pd.date_range(start='2023-01-01',periods=N,freq='D'),
'x': np.linspace(0,stop=N-1,num=N),
'y': np.random.rand(N),
'C': np.random.choice(['Low','Medium','High'],N).tolist(),
'D': np.random.normal(100, 10, size=(N)).tolist()
})
print(df)
# A x y C D
#0 2023-01-01 0.0 0.925854 High 104.851562
#1 2023-01-02 1.0 0.103372 Low 100.870139
#2 2023-01-03 2.0 0.667730 Medium 96.781621
#3 2023-01-04 3.0 0.149160 Medium 98.100630
#4 2023-01-05 4.0 0.606853 High 87.979547
#5 2023-01-06 5.0 0.450098 Low 104.103565
# DataFrame重建索引
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])
print(df_reindexed)
# A C B
#0 2023-01-01 High NaN
#2 2023-01-03 Medium NaN
#5 2023-01-06 Low NaN
通过将值传递给关键字来填充缺少的值fill_value
import pandas as pd
import numpy as np
index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
df = pd.DataFrame({'http_status': [200, 200, 404, 404, 301],
'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
index=index)
print(df)
# http_status response_time
#Firefox 200 0.04
#Chrome 200 0.02
#Safari 404 0.07
#IE10 404 0.08
#Konqueror 301 1.00
new_index = ['Safari', 'Iceweasel', 'QQ', 'IE10','Chrome']
df1=df.reindex(index=new_index,fill_value='missing')
print( df1)
# http_status response_time
#Safari 404 0.07
#Iceweasel missing missing
#QQ missing missing
#IE10 404 0.08
#Chrome 200 0.02
set_index( )
重新设置一个索引
DataFrame.set_index(keys, drop=True, append=False, inplace=False, verify_integrity=False)
keys:列标签或列标签/数组列表,需要设置为索引的列
drop:默认为True,删除用作新索引的列
append:是否将列追加到现有索引,默认为False。
inplace:输入布尔值,表示当前操作是否对原数据生效,默认为False。
verify_integrity:检查新索引的副本。否则,请将检查推迟到必要时进行。将其设置为false将提高该方法的性能,默认为false。
import pandas as pd
import numpy as np
df = pd.DataFrame({'month': [1, 4, 7, 10],
'year': [2012, 2014, 2013, 2014],
'sale': [55, 40, 84, 31]})
print(df)
# month year sale
#0 1 2012 55
#1 4 2014 40
#2 7 2013 84
#3 10 2014 31
df1=df.set_index('month')
print(df1)
# year sale
#month
#1 2012 55
#4 2014 40
#7 2013 84
#10 2014 31
# 使用‘year’和‘month’列创建一个MultiIndex:
df2=df.set_index(['year', 'month'])
print(df2)
sale
year month
2012 1 55
2014 4 40
2013 7 84
2014 10 31
reset_index()
创建一个新的 DataFrame,并将索引列作为新的一列添加到 DataFrame 中
DataFrame.reset_index(level=None, drop=False, inplace=False, col_level=0, col_fill='')
import pandas as pd
import numpy as np
df = pd.DataFrame([('bird', 389.0),
('bird', 24.0),
('mammal', 80.5),
('mammal', np.nan)],
index=['falcon', 'parrot', 'lion', 'monkey'],
columns=('class', 'max_speed'))
print(df)
# class max_speed
#falcon bird 389.0
#parrot bird 24.0
#lion mammal 80.5
#monkey mammal NaN
print(df)
df1=df.reset_index()
print(df1)
# index class max_speed
#0 falcon bird 389.0
#1 parrot bird 24.0
#2 lion mammal 80.5
#3 monkey mammal NaN
# 可以使用drop参数来避免将旧索引添加为列
df2=df.reset_index(drop=True)
print(df2)
# class max_speed
#0 bird 389.0
#1 bird 24.0
#2 mammal 80.5
#3 mammal NaN