Merge合并语法
| # 第一个表: |
| df_ratings = pd.read_csv( |
| 'data/ratings.csv', |
| sep=',', |
| engine='python', |
| names='UserID,MovieID,Rating,Timestamp'.split(',') |
| ) |
| # 第二个表: |
| df_users = pd.read_csv( |
| 'data/user.csv', |
| sep=',', |
| engine='python', |
| names='UserID,Gender,Age,Occupation,Zip-code'.split(',') |
| ) |
| # 第三个表: |
| df_movies = pd.read_csv( |
| 'data/movies.dat', |
| sep=',', |
| engine='python', |
| names='MovieID,Title,Genres'.split(',') |
| ) |
| |
| # 1)第一个merge 两个表进行合并 |
| df_ratings_user = pd.merge( # 两边都有的数据才会显示 |
| df_ratings, df_users, left_on='UserID', right_on='UserID', how='inner' |
| ) |
| |
| # 2)第二个merge,将合并后的两个表和第三个表进行合并 |
| df_ratings_users_movies = pd.merge( |
| df_ratings_user, df_movies,left_on = 'MovieID', right_on = 'MovieID', how = 'inner' |
| ) |
| |
| # 3)理解merge时候数量的对齐关系 |
| # one-to-one:一对一关系,关联的key都是唯一的。结果条数:1*1 |
| # one-to-many:一对多关系,左边唯一key,右边不唯一key。结果条数:1*N |
| # many-to-many:多对多关系,左边右边都不是唯一的。结果条数:M*N |
| |
| # 4)one-to-one一对一关系的merge |
| left = pd.DataFrame({'sno': [11,12,13,14], |
| 'name':['name_a', 'name_b','name_c', 'name_d']}, |
| ) |
| right = pd.DataFrame({'sno': [11,12,13,14], |
| 'age':['21', '22','23', '24']}, |
| ) |
| pd.merge(left, right, on='sno') |
| |
| # 5)one-to-many一对多关系的merge 注意:数据会被复制,数量以多的为准 |
| left = pd.DataFrame({'sno': [11,12,13,14], |
| 'name':['name_a', 'name_b','name_c', 'name_d']}, |
| ) |
| right = pd.DataFrame({'sno': [11,11,11,12,12,13], |
| 'grade':['语文88', '数学90','英语76', '语文66', '数学55', '英语29']}, |
| ) |
| pd.merge(left, right, on='sno') |
| |
| # 6)many-to-many多对多关系的merge |
| left = pd.DataFrame({'sno': [11,11,12,12,13,14], |
| '爱好':['篮球', '羽毛球','乒乓球', '篮球', '足球', '篮球']}, |
| ) |
| right = pd.DataFrame({'sno': [11,11,11,12,12,13], |
| 'grade':['语文88', '数学90','英语76', '语文66', '数学55', '英语29']}, |
| ) |
| pd.merge(left, right, on='sno') |
| |
| # 7)理解left join,right join,inner join, outer join的区别 |
| left = pd.DataFrame({ |
| 'key':['K0', 'K1', 'K2', 'K3'], |
| 'A':['A0','A1','A2','A3'], |
| 'B':['B0', 'B1', 'B2', 'B3'] # left join左侧全保留 + 保留都有的部分,右面无法匹配为Null |
| }) |
| right = pd.DataFrame({ |
| 'key': ['K0', 'K1', 'K4', 'K5'], |
| 'C': ['C0', 'C1', 'C4', 'C5'], # right join右侧全保留 + 保留都有的部分,左面无法匹配为Null |
| 'D': ['D0', 'D1', 'D4', 'D5'] |
| }) |
| |
| # 8)inner join 默认 |
| pd.merge(left, right, how='inner') |
| |
| # 9)outer join左边,右边都会出现在结果中,如果无法匹配则为Null |
| pd.merge(left, right, how='outer') |
| |
| # 10)如果出现非Key的字段相同,怎么办? |
| left = pd.DataFrame({ |
| 'key':['K0', 'K1', 'K2', 'K3'], |
| 'A':['A0','A1','A2','A3'], |
| 'B':['B0', 'B1', 'B2', 'B3'] |
| }) |
| right = pd.DataFrame({ |
| 'key': ['K0', 'K1', 'K4', 'K5'], |
| 'A': ['A10', 'A11', 'A12', 'A13'], |
| 'D': ['D0', 'D1', 'D4', 'D5'] |
| }) |
| pd.merge(left, right, on='key') |
| # key A_x B A_y D |
| # 0 K0 A0 B0 A10 D0 |
| # 1 K1 A1 B1 A11 D1 |
| pd.merge(left, right, on='key', suffixes=('_left', '_right')) |
| # key A_left B A_right D |
| # 0 K0 A0 B0 A10 D0 |
| # 1 K1 A1 B1 A11 D1 |
Concat合并语法
| import warnings |
| warnings.filterwarnings('ignore') |
| |
| |
| |
| pd.concat([df1, df2]) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| pd.concat([df1, s1], axis=1) |
| |
| |
| s2 = df1.apply(lambda s:s['A'] + '_GG', axis=1) |
| s2.name='G' |
| pd.concat([df1, s1, s2], axis=1) |
| |
| |
| pd.concat([s1, s2], axis=1) |
| |
| |
| pd.concat([s1, df1, s2], axis=1) |
| |
| |
| df1 = pd.DataFrame([[1,2], [3,4]], columns=list('AB')) |
| df2 = pd.DataFrame([[5,6], [7,8]], columns=list('AB')) |
| df1.append(df2) |
| |
| df1.append(df2, ignore_index=True) |
| |
| |
| df = pd.DataFrame(columns=['A']) |
| |
| |
| for i in range(5): |
| df = df.append({'A':i}, ignore_index=True) |
| |
| |
| pd.concat( |
| [pd.DataFrame([i], columns=['A']) for i in range(5)], |
| ignore_index=True |
| ) |
| |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律