R语言中merge函数

 

001、合并取交集

name1 <- c("aa", "bb", "cc", "dd")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name1, gender, age)         

name2 <- c("xx", "bb", "yy", "dd")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name2, math, eng)

dat1                                                   ## 测试数据框
dat2
merge(dat1, dat2, by.x = "name1", by.y = "name2")      ## 分别对两个数据框指定列,按照重合列进行合并

 

002、合并取交集

name <- c("aa", "bb", "cc", "dd")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name, gender, age)

name <- c("xx", "bb", "yy", "dd")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name, math, eng)

dat1
dat2
merge(dat1, dat2, by = "name")             ## 列名相同时,直接使用by。

 

003、合并取并集

name1 <- c("aa", "bb", "cc", "dd")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name1, gender, age)         

name2 <- c("xx", "bb", "yy", "dd")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name2, math, eng)
dat1
dat2
merge(dat1, dat2, by.x = "name1", by.y = "name2", all = T)  ## all = T表示合并取并集,缺失值用NA填充

 

004、仅对其中一个数据框取并集

(1)、dat1并集

name1 <- c("aa", "bb", "cc", "dd")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name1, gender, age)         

name2 <- c("xx", "bb", "yy", "dd")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name2, math, eng)

dat1
dat2
merge(dat1, dat2, by.x = "name1", by.y = "name2", all.x = T, all.y = F)  ## 保留dat1所有行

 

 

(2)、dat2并集

name1 <- c("aa", "bb", "cc", "dd")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name1, gender, age)         

name2 <- c("xx", "bb", "yy", "dd")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name2, math, eng)

dat1
dat2
merge(dat1, dat2, by.x = "name1", by.y = "name2",all.x = F, all.y = T)   ## dat2取并集

 

005、没有同名(或者不指定)列时合并所有

name1 <- c("aa", "bb", "cc", "dd")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name1, gender, age)         

name2 <- c("xx", "bb", "yy", "dd")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name2, math, eng)

dat1
dat2
merge(dat1, dat2)    ## 没有同名列,也不指定,合并所有

 

006、是否对合并后的数据排序

name1 <- c("xx", "dd", "yy", "bb")
gender <- c("m", "m", "m", "f")
age <- c(13, 14, 14, 15)
dat1 <- data.frame(name1, gender, age)         

name2 <- c("xx", "yy", "cc", "bb")
math <- c(89, 67, 87, 68)
eng <- c(87, 88, 68, 65)
dat2 <- data.frame(name2, math, eng)

dat1
dat2
merge(dat1, dat2, by.x = "name1", by.y = "name2", sort = T)
merge(dat1, dat2, by.x = "name1", by.y = "name2", sort = F)     ## 合并后不进行排序

参考:https://blog.csdn.net/chongbaikaishi/article/details/115740560

 

posted @ 2022-05-24 21:01  小鲨鱼2018  阅读(929)  评论(0编辑  收藏  举报