pandas数据合并之concat
数据合并concat
#concat 函数
#参数解释
concat(
objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame],
axis: Axis = 0,
join: str = "outer",#设置函数的并集、交集问题
ignore_index: bool = False,#是否重置索引
keys=None,# 设置多重索引名称
levels=None,
names=None,#对多重索引结果的名称进行设置
verify_integrity: bool = False,#检查数据合并方向上是否有重复的索引标签值,如果有重复值则会报错
sort: bool = False,
copy: bool = True,
)
#针对Series
import pandas as pd
a = pd.Series([0,3,6,9])
b = pd.Series([12,15,18,21])
c = pd.concat([a,b],ignore_index=True)# ignore_index:是否忽视行索引
'''
#输出结果
0 0
1 3
2 6
3 9
4 12
5 15
6 18
7 21
'''
c = pd.concat([a,b],ignore_index=True,axis=1)# ignore_index:是否忽视行索引
'''
输出结果
0 1
0 0 12
1 3 15
2 6 18
3 9 21
'''
c = pd.concat([a,b],keys=["s1","s2"])# 行索引为多重索引,keys
'''
#输出结果
s1 0 0
1 3
2 6
3 9
s2 0 12
1 15
2 18
3 21
'''
c = pd.concat([a,b],keys=["s1","s2",],axis=1)# axis=1,s1","s2"为列明
'''
输出结果
s1 s2
0 0 12
1 3 15
2 6 18
3 9 21
'''
c = pd.concat([a,b],keys=["s1","s2",],names=["s_name","ID"])# 对多重索引结果的名称进行设置
"""
输出结果
s_name ID
s1 0 0
1 3
2 6
3 9
s2 0 12
1 15
2 18
3 21
"""
#在数据框中的应用
#对于索引相同的
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"b":[13,16,19,22],"c":[14,17,20,23]}
df2 = pd.DataFrame(data2,)
a = pd.concat([df1,df2],ignore_index=True)#沿0轴大方向直接合并
a = pd.concat([df1,df2],axis =1)#沿1轴大方向直接合并
#对于索引不一致
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
a = pd.concat([df1,df2])#按照列名称,将列名称一致的合并
a = pd.concat([df1,df2],ignore_index=True)#对列索引名称重新进行排序
a = pd.concat([df1,df2],axis=1)#按照行名称,将行名称一致的合并
"""
输出结果
a b c a f g
0 0.0 1.0 2.0 NaN NaN NaN
1 3.0 4.0 5.0 NaN NaN NaN
2 6.0 7.0 8.0 NaN NaN NaN
3 9.0 10.0 11.0 12.0 13.0 14.0
x NaN NaN NaN 15.0 16.0 17.0
y NaN NaN NaN 18.0 19.0 20.0
z NaN NaN NaN 21.0 22.0 23.0
"""
#contat函数中join参数并集、交集
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
a = pd.concat([df1,df2],join="inner")#join="inner",交集,合并列标签相同的列
"""
a
0 0
1 3
2 6
3 9
3 12
x 15
y 18
z 21
"""
print(df1)
print(df2)
a = pd.concat([df1,df2],join="inner",axis=1)#join="inner",交集,axis=1,合并行标签相同的列
print(a)
#保留某数据框的行索引标签与列索引标签
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
a = pd.concat([df1,df2],axis=1).reindex(df1.index)#参数:reset_index,保留data1的行标签
"""
a b c a f g
0 0.0 1.0 2.0 NaN NaN NaN
1 3.0 4.0 5.0 NaN NaN NaN
2 6.0 7.0 8.0 NaN NaN NaN
3 9.0 10.0 11.0 12.0 13.0 14.0
"""
a = pd.concat([df1,df2],axis=1)[df1.columns]#参数:reset_index,保留data1的列标签
"""
#输出结果
a a b c
0 0.0 NaN 1.0 2.0
1 3.0 NaN 4.0 5.0
2 6.0 NaN 7.0 8.0
3 9.0 12.0 10.0 11.0
x NaN 15.0 NaN NaN
y NaN 18.0 NaN NaN
z NaN 21.0 NaN NaN
"""
#keys
a = pd.concat([df1,df2],axis=1,keys=["data1","data2"])#通过keys设置多重索引标签
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
s1 = df1["b"]#构建一个Seris
#合并某一列
a = pd.concat([df1,s1],axis=1)#数据框与series合并时,直接在右侧进行合并
"""
a b c b
0 0 1 2 1
1 3 4 5 4
2 6 7 8 7
3 9 10 11 10
"""
print(df1)
print(df2)
s2 = df1.loc[0]
print(s2)
st = s2.to_frame().T
print(st)
a = pd.concat([df2,st])
"""
a f g b c
3 12 13.0 14.0 NaN NaN
x 15 16.0 17.0 NaN NaN
y 18 19.0 20.0 NaN NaN
z 21 22.0 23.0 NaN NaN
0 0 NaN NaN 1.0 2.0
"""
#合并某一行
#由于serie天然纵向的特性,因此需要对series进行转置操作
#contac还可将数据框组成的字典对象进行合并
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=["e","x","y","z"])
a = pd.concat([df1,df2],verify_integrity=True)
# 还可将数据框组成的字典对象进行合并
fracm = {"x":df1,"y":df2}
df = pd.concat(fracm)
print(df)
"""
a b c f g
x 0 0 1.0 2.0 NaN NaN
1 3 4.0 5.0 NaN NaN
2 6 7.0 8.0 NaN NaN
3 9 10.0 11.0 NaN NaN
y e 12 NaN NaN 13.0 14.0
x 15 NaN NaN 16.0 17.0
y 18 NaN NaN 19.0 20.0
z 21 NaN NaN 22.0 23.0
"""
#
记录学习的点点滴滴