pandas之数据合并
import numpy as np import pandas as pd df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd']) df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd']) df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd']) # print(df1) # print(df2) # print(df3) # 纵向合并 res = pd.concat([df1, df2, df3], axis=0) print(res) # 重置index索引 res = pd.concat([df1, df2, df3], axis=0, ignore_index=True) print(res) # 纵向外合并 df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3]) df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4]) res = pd.concat([df3, df4], axis=1, join='outer', ignore_index=True) print(res) # 纵向内合并 df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3]) df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4]) res = pd.concat([df3, df4], axis=0, join='inner', ignore_index=True) print(res) # 按照某个指定的轴来对齐数据 df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3]) df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4]) res = pd.concat([df1, df2], axis=1, join_axes=[df1.index]) print(res) # append合并 df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3]) df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4]) res = df1.append(df2,ignore_index=True) print(res) 输出结果: a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 2 1.0 1.0 1.0 1.0 0 2.0 2.0 2.0 2.0 1 2.0 2.0 2.0 2.0 2 2.0 2.0 2.0 2.0 a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 1.0 1.0 1.0 4 1.0 1.0 1.0 1.0 5 1.0 1.0 1.0 1.0 6 2.0 2.0 2.0 2.0 7 2.0 2.0 2.0 2.0 8 2.0 2.0 2.0 2.0 0 1 2 3 4 5 6 7 1 1.0 1.0 1.0 1.0 NaN NaN NaN NaN 2 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0 3 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0 4 NaN NaN NaN NaN 2.0 2.0 2.0 2.0 b c d 0 1.0 1.0 1.0 1 1.0 1.0 1.0 2 1.0 1.0 1.0 3 2.0 2.0 2.0 4 2.0 2.0 2.0 5 2.0 2.0 2.0 a b c d a b c d 0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 1 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 1.0 1.0 1.0 4 1.0 1.0 1.0 1.0 5 1.0 1.0 1.0 1.0
import numpy as np import pandas as pd a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a']) print(a) b = pd.Series([1, np.nan, 3, 4, 5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a']) print(b) # 用a的数据填充b的缺失值 print(b.combine_first(a)) # 用b的数据填充a的缺失值 print(a.combine_first(b)) 输出结果: f NaN e 2.5 d NaN c 3.5 b 4.5 a NaN dtype: float64 f 1.0 e NaN d 3.0 c 4.0 b 5.0 a NaN dtype: float64 f 1.0 e 2.5 d 3.0 c 4.0 b 5.0 a NaN dtype: float64 f 1.0 e 2.5 d 3.0 c 3.5 b 4.5 a NaN dtype: float64