A B C D E F
0 1.0 2018-01-01 1.0 3 test foo
1 1.0 2018-01-01 1.0 3 train foo
2 1.0 2018-01-01 1.0 3 test foo
3 1.0 2018-01-01 1.0 3 train foo
A float64
B datetime64[ns]
C float32
D int32
E category
F object
dtype: object
A C D
count 4.0 4.0 4.0
mean 1.0 1.0 3.0
std 0.0 0.0 0.0
min 1.0 1.0 3.0
25% 1.0 1.0 3.0
50% 1.0 1.0 3.0
75% 1.0 1.0 3.0
max 1.0 1.0 3.0
0 1 2 \
A 1 1 1
B 2018-01-01 00:00:00 2018-01-01 00:00:00 2018-01-01 00:00:00
C 1 1 1
D 3 3 3
E test train test
F foo foo foo
3
A 1
B 2018-01-01 00:00:00
C 1
D 3
E train
F foo
F E D C B A
0 foo test 3 1.0 2018-01-01 1.0
1 foo train 3 1.0 2018-01-01 1.0
2 foo test 3 1.0 2018-01-01 1.0
3 foo train 3 1.0 2018-01-01 1.0
A B C D E F
0 1.0 2018-01-01 1.0 3 test foo
2 1.0 2018-01-01 1.0 3 test foo
1 1.0 2018-01-01 1.0 3 train foo
3 1.0 2018-01-01 1.0 3 train foo
Empty DataFrame
Columns: [A, B, C, D]
Index: []
A B C D
2018-01-01 0 1 2 3
2018-01-02 4 5 6 7
2018-01-03 8 9 10 11
A B C D
2018-01-03 8 9 10 11
2018-01-04 12 13 14 15
2018-01-05 16 17 18 19
print(df3.loc['20180102'])#返回指定行构成的序列
A 4
B 5
C 6
D 7
Name: 2018-01-02 00:00:00, dtype: int32
print(df3.loc['20180103',['A','C']])#列筛选print()print(df3.loc['20180103':'20180105',['A','C']])#子df,类似select A, C from df limit ...print()print(df3.loc[:,['A','B']])
A 8
C 10
Name: 2018-01-03 00:00:00, dtype: int32
A C
2018-01-03 8 10
2018-01-04 12 14
2018-01-05 16 18
A B
2018-01-01 0 1
2018-01-02 4 5
2018-01-03 8 9
2018-01-04 12 13
2018-01-05 16 17
2018-01-06 20 21
A B C D
2018-01-01 0 1 2 3
2018-01-02 4 5 6 7
2018-01-03 8 9 10 11
2018-01-04 12 13 14 15
2018-01-05 16 17 18 19
2018-01-06 20 21 22 23
A B C D E
2018-01-01 0 1 2 3 NaN
2018-01-02 4 5 6 7 NaN
2018-01-03 8 9 10 11 NaN
2018-01-04 12 13 14 15 NaN
2018-01-05 16 17 18 19 NaN
2018-01-06 20 21 22 23 NaN
A B C D E F
2018-01-01 0 1 2 3 NaN NaN
2018-01-02 4 5 6 7 NaN 1.0
2018-01-03 8 9 10 11 NaN 2.0
2018-01-04 12 13 14 15 NaN 3.0
2018-01-05 16 17 18 19 NaN 4.0
2018-01-06 20 21 22 23 NaN 5.0
A int32
B int32
C int32
D int32
E float64
F float64
dtype: object
A B C D
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
A B C D
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
A B C D
0 2.0 2.0 2.0 2.0
1 2.0 2.0 2.0 2.0
2 2.0 2.0 2.0 2.0
A B C D
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
0 2.0 2.0 2.0 2.0
1 2.0 2.0 2.0 2.0
2 2.0 2.0 2.0 2.0
A B C D
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
5 1.0 1.0 1.0 1.0
6 2.0 2.0 2.0 2.0
7 2.0 2.0 2.0 2.0
8 2.0 2.0 2.0 2.0
df0 = pd.DataFrame(np.ones((3,4))*0, columns=['A','B','C','D'])
df1 = pd.DataFrame(np.ones((3,4))*1, columns=['E','F','C','D'])
res = pd.concat([df0, df1], ignore_index=True)print(res);print()
res = pd.concat([df0, df1], join='outer', ignore_index=True)print(res);print()
res = pd.concat([df0, df1], join='inner',ignore_index=True)print(res);print()
A B C D E F
0 0.0 0.0 0.0 0.0 NaN NaN
1 0.0 0.0 0.0 0.0 NaN NaN
2 0.0 0.0 0.0 0.0 NaN NaN
3 NaN NaN 1.0 1.0 1.0 1.0
4 NaN NaN 1.0 1.0 1.0 1.0
5 NaN NaN 1.0 1.0 1.0 1.0
A B C D E F
0 0.0 0.0 0.0 0.0 NaN NaN
1 0.0 0.0 0.0 0.0 NaN NaN
2 0.0 0.0 0.0 0.0 NaN NaN
3 NaN NaN 1.0 1.0 1.0 1.0
4 NaN NaN 1.0 1.0 1.0 1.0
5 NaN NaN 1.0 1.0 1.0 1.0
C D
0 0.0 0.0
1 0.0 0.0
2 0.0 0.0
3 1.0 1.0
4 1.0 1.0
5 1.0 1.0
#横向合并
df0 = pd.DataFrame(np.ones((3,4))*0, index=['1','2','3'], columns=['A','B','C','D'])
df1 = pd.DataFrame(np.ones((3,4))*1, index=['2','3','4'], columns=['A','B','C','D'])print(df0);print()print(df1);print()
res = pd.concat([df0, df1], axis=1)print(res);print()
res = pd.concat([df0, df1], axis=1, join='inner', ignore_index=True)print(res);print()
res = pd.concat([df0, df1], axis=1, join_axes=[df0.index])print(res);print()
A B C D
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0
A B C D
2 1.0 1.0 1.0 1.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
A B C D A B C D
1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN
2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
4 NaN NaN NaN NaN 1.0 1.0 1.0 1.0
0 1 2 3 4 5 6 7
2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
A B C D A B C D
1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN
2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
df0 = pd.DataFrame(np.ones((3,4))*0, index=['1','2','3'], columns=['A','B','C','D'])
df1 = pd.DataFrame(np.ones((3,4))*1, index=['2','3','4'], columns=['A','B','C','D'])print(df0);print()print(df1);print()
res = df0.append([df1, df1], ignore_index=False)print(res);print()
s = pd.Series([1,2,3,4], index=['A','B','C','E'])print(df0.append(s, ignore_index=True))
A B C D
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0
A B C D
2 1.0 1.0 1.0 1.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
A B C D
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0
2 1.0 1.0 1.0 1.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
A B C D E
0 0.0 0.0 0.0 0.0 NaN
1 0.0 0.0 0.0 0.0 NaN
2 0.0 0.0 0.0 0.0 NaN
3 1.0 2.0 3.0 NaN 4.0
df1 = pd.DataFrame({'key':['K0','K1','K2'],'A':['A0','A1','A2'],'B':['B0','B1','B2']})
df2 = pd.DataFrame({'key':['K3','K1','K2'],'C':['C3','C1','C2'],'D':['D3','D1','D2']})print(df1);print()print(df2);print()
res = pd.merge(df1, df2, on='key')print(res);print()
res = pd.merge(df1, df2, on='key', how='outer')print(res);print()
res = pd.merge(df1, df2, on='key', how='left')print(res);print()
res = pd.merge(df1, df2, on='key', how='right')print(res);print()
A B key
0 A0 B0 K0
1 A1 B1 K1
2 A2 B2 K2
C D key
0 C3 D3 K3
1 C1 D1 K1
2 C2 D2 K2
A B key C D
0 A1 B1 K1 C1 D1
1 A2 B2 K2 C2 D2
A B key C D
0 A0 B0 K0 NaN NaN
1 A1 B1 K1 C1 D1
2 A2 B2 K2 C2 D2
3 NaN NaN K3 C3 D3
A B key C D
0 A0 B0 K0 NaN NaN
1 A1 B1 K1 C1 D1
2 A2 B2 K2 C2 D2
A B key C D
0 A1 B1 K1 C1 D1
1 A2 B2 K2 C2 D2
2 NaN NaN K3 C3 D3
df1 = pd.DataFrame({'key1':['K0','K0','K1'],'key2':['K0','K1','K1'],'A':['A0','A1','A2'],'B':['B0','B1','B2']})
df2 = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K0','K1','K2'],'C':['C3','C1','C2','C4'],'D':['D3','D1','D2','D4']})print(df1);print()print(df2);print()
res = pd.merge(df1, df2, on=['key1','key2'])print(res);print()
res = pd.merge(df1, df2, on=['key1','key2'], how='outer', indicator='indi')print(res);print()
A B key1 key2
0 A0 B0 K0 K0
1 A1 B1 K0 K1
2 A2 B2 K1 K1
C D key1 key2
0 C3 D3 K0 K0
1 C1 D1 K0 K0
2 C2 D2 K1 K1
3 C4 D4 K2 K2
A B key1 key2 C D
0 A0 B0 K0 K0 C3 D3
1 A0 B0 K0 K0 C1 D1
2 A2 B2 K1 K1 C2 D2
A B key1 key2 C D indi
0 A0 B0 K0 K0 C3 D3 both
1 A0 B0 K0 K0 C1 D1 both
2 A1 B1 K0 K1 NaN NaN left_only
3 A2 B2 K1 K1 C2 D2 both
4 NaN NaN K2 K2 C4 D4 right_only
A B
index0 A0 B0
index1 A1 B1
index2 A2 B2
A D
index3 C3 D3
index1 C1 D1
index2 C2 D2
A_x B A_y D
index1 A1 B1 C1 D1
index2 A2 B2 C2 D2
A_b B A_g D
index0 A0 B0 NaN NaN
index1 A1 B1 C1 D1
index2 A2 B2 C2 D2
index3 NaN NaN C3 D3
res = df1.join(df2, how='outer', lsuffix='_left', rsuffix='_right')#不用on默认用索引合并print(res);print()
res = df1.join(df2, on='B', how='outer', lsuffix='_left', rsuffix='_right')#用on指定df1的某列和df2的索引合并print(res);print()
A_left B A_right D
index0 A0 B0 NaN NaN
index1 A1 B1 C1 D1
index2 A2 B2 C2 D2
index3 NaN NaN C3 D3
A_left B A_right D
index0 A0 B0 NaN NaN
index1 A1 B1 NaN NaN
index2 A2 B2 NaN NaN
index2 NaN index3 C3 D3
index2 NaN index1 C1 D1
index2 NaN index2 C2 D2