给大厨写的R数据分析代码
###************************************** 新老客户统计 ***************************************### dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date")) str(dachu) head(dachu,20) temp <- table(dachu$买家昵称) plot(table(sort(temp))/length(temp)) #library(data.table) #month(dachu$下单日期[nrow(dachu)]) min(dachu$下单日期) max(dachu$下单日期) dachu$ym <- substr(dachu$下单日期, 1,7); head(dachu) newcusts <- c() oldcusts <- c() ss <- sort(unique(dachu$ym)) #新客户满足一下两个条件:1)当月购买一次;2)之前无购买记录 #老客户满足一下两个条件之一:1)当月购买两次及以上;2)当月购买一次且之前有购买记录 for(i in 1:length(ss)){ #date1 = as.Date(paste(substr(kk, 1, 6), paste(as.integer(substr(kk, 7, 7))+1,"-01", sep = ""), sep = "")) if(i == 1){ date2 = as.Date(paste(ss[i+1], "-01", sep = "")) now = dachu$买家昵称[dachu$下单日期 < date2] temp = table(now) uniq = unique(now) newcusts = c(newcusts, sum(temp == 1)) oldcusts = c(oldcusts, sum(temp > 1)) }else if(i < length(ss)){ date1 = as.Date(paste(ss[i], "-01", sep = "")) date2 = as.Date(paste(ss[i+1], "-01", sep = "")) now = dachu$买家昵称[(dachu$下单日期 < date2) & (dachu$下单日期 >= date1)] temp = table(now) #old_now = names(temp)[temp>1] new_now = names(temp)[temp==1] temp2 = table(c(uniq, new_now)) newcusts = c(newcusts, (length(new_now) - sum(temp2 > 1))) #oldcusts = c(oldcusts, (length(old_now) + sum(temp2 > 1))) oldcusts = c(oldcusts, (length(temp) - length(new_now) + sum(temp2 > 1))) #uniq = unique(c(uniq, old_now, new_now)) uniq = unique(c(uniq, names(temp))) }else{ date1 = as.Date(paste(ss[i], "-01", sep = "")) now = dachu$买家昵称[dachu$下单日期 >= date1] temp = table(now) #old_now = names(temp)[temp>1] new_now = names(temp)[temp==1] temp2 = table(c(uniq, new_now)) newcusts = c(newcusts, (length(new_now) - sum(temp2 > 1))) #oldcusts = c(oldcusts, (length(old_now) + sum(temp2 > 1))) oldcusts = c(oldcusts, (length(temp) - length(new_now) + sum(temp2 > 1))) #uniq = unique(c(uniq, old_now, new_now)) uniq = unique(c(uniq, names(temp))) } } newcusts oldcusts (newcusts1 = cbind(date=ss, newcusts)) (oldcusts1 = cbind(date=ss, oldcusts)) write.csv(newcusts1, "C:\\Users\\hasee\\Desktop\\newcusts.csv",quote = F) write.csv(oldcusts1, "C:\\Users\\hasee\\Desktop\\oldcusts.csv",quote = F) #library(timeSeries) win.graph() opar <- par(no.readonly=TRUE) par(lty=1, pch=1) #par("cex") 查看默认值 # plot.ts(ts(newcusts+oldcusts, start = c(2014, 3), frequency = 12),main="薏凡特月度新老客户购买数量变化趋势", col=1) # lines(ts(newcusts, start = c(2014, 3), frequency = 12), col=2) # lines(ts(oldcusts, start = c(2014, 3), frequency = 12), col=3) time <- seq.Date(as.Date("2014/3/1"), by = "month", length = length(ss)) plot(time, newcusts+oldcusts, xlab="月份", ylab="客户数", main="薏凡特月度新老客户购买数量变化趋势", type = "o", col=1) # type画点/线, "p" for points, "l" for lines, "b" for both points and lines, "c" for empty points joined by lines, # "o" for overplotted points and lines, "s" and "S" for stair steps and "h" for histogram-like vertical lines. # Finally, "n" does not produce any points or lines. # pch点型, # cex点大小: # lty线型:0=blank, 1=solid (default), 2=dashed, 3=dotted, 4=dotdash, 5=longdash, 6=twodash) # lwd线宽 lines(time, newcusts, type = "o", col=2) lines(time, oldcusts, type = "o", col=3) legend("topright", c("总体客户", "新客户", "老客户"), col=1:3, lty=1, pch=1) # “bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center” par(opar) #par(new=TRUE) ###************************************** 当月回购率 ***************************************### # 月初统计购买一次的客户数,月末统计这部分人回购人数。 # 当月新进的客户且购买2次以上的不计入新客户 # 新客户可直接table=1的sum,但是当月回购的客户如何计算是难点。(可以unique内连接计数) #数据导入 dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date")) str(dachu) #定义保存新客户回购数据 new_customer <- data.frame() min(dachu$下单时间) max(dachu$下单时间) ss=sort(unique(substr(dachu$下单时间,1,7))) #从第二个月开始,首月新客数和回购数均为0 for(i in seq(length(ss))[-1]){ data1 = as.Date(paste(ss[i], "-01", sep = "")) #月初之前客户购买记录 data2 = max(i-12,1) data2 = as.Date(paste(ss[data2], "-01", sep = "")) temp <- table(dachu$买家昵称[(dachu$下单时间 >= data2)&(dachu$下单时间 < data1)]) #月内客户购买记录 if(i < length(ss)){ data2 = as.Date(paste(ss[i+1], "-01", sep = "")) temp2 <- table(dachu$买家昵称[(dachu$下单时间 >= data1)&(dachu$下单时间 < data2)]) }else{ temp2 <- table(dachu$买家昵称[dachu$下单时间 >= data1]) } #月内回购记录 temp2 = merge(data.frame(k=names(temp)[temp==1]), data.frame(k=names(temp2)), by=c('k')) #保存日期、月初新客数、月内回购数 new_customer = rbind(new_customer, data.frame(date=ss[i], counts=sum(temp==1), repurchase=nrow(temp2))) } #计算回购率 new_customer$rate <- new_customer[[3]] / new_customer[[2]] #colnames(new_customer) = c('date','counts','repurchase','rate') win.graph() opar<-par(mfrow=c(2,2)) plot(new_customer$date,new_customer$counts) plot(new_customer$date,new_customer$repurchase);plot(new_customer$date,new_customer$rate) par(opar) write.csv(new_customer,"C:\\Users\\hasee\\Desktop\\new_customer.csv") ###****************************************** 季度转化率 ****************************************### #数据导入 dachu <- read.csv("C:\\Users\\hasee\\Desktop\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date")) str(dachu) #定义保存新客户回购数据 new_customer <- data.frame() min(dachu$下单时间) max(dachu$下单时间) ss=sort(unique(substr(dachu$下单时间,1,7))) #从第二个月开始,首月新客数和回购数均为0 for(i in seq(length(ss)-2)[-1]){ data1 = as.Date(paste(ss[i], "-01", sep = "")) #季度初之前客户购买记录 temp <- table(dachu$买家昵称[dachu$下单时间 < data1]) #季度内客户购买记录 if(i < length(ss)-2){ data2 = as.Date(paste(ss[i+3], "-01", sep = "")) temp2 <- table(dachu$买家昵称[(dachu$下单时间 >= data1)&(dachu$下单时间 < data2)]) }else{ temp2 <- table(dachu$买家昵称[dachu$下单时间 >= data1]) } #季度内回购记录 temp2 = merge(data.frame(k=names(temp)[temp==1]), data.frame(k=names(temp2)), by=c('k')) #保存日期、季度初新客数、月内回购数 new_customer = rbind(new_customer, data.frame(date=ss[i], counts=sum(temp==1), repurchase=nrow(temp2))) } #计算回购率 new_customer$rate <- new_customer[[3]] / new_customer[[2]] #colnames(new_customer) = c('date','counts','repurchase','rate') win.graph() opar<-par(mfrow=c(2,2)) plot(new_customer$date,new_customer$counts) plot(new_customer$date,new_customer$repurchase);plot(new_customer$date,new_customer$rate) par(opar) write.csv(new_customer,"C:\\Users\\hasee\\Desktop\\new_customer.csv") ###************************************ 客户连带率:该段代码貌似有问题 ***********************************### # 只针对所有一次客户 # 月连带率=本月发生连带的客户数/本月成交总客户数 # 产品连带率=购买该产品连带的客户数/购买该产品总体客户数 # 成交总客户=1次多件客户+一次一件客户 #数据导入 library(readxl) # dachu <- read.csv("C:\\Users\\hasee\\Desktop\\liandailv.xlsx", header = T, encoding = "utf-8", colClasses = c("character", "Date", "character")) # read_excel(path, sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0) dachu <- read_excel("C:\\Users\\hasee\\Desktop\\liandailv.xlsx", sheet = 1, col_names = TRUE, col_types = c("text", "text", "text"), na = "", skip = 0) dachu$下单日期 <- as.Date(dachu$下单日期) str(dachu) unique(dachu$商品ID) #定义保存月度连带率 min(dachu$下单日期) max(dachu$下单日期) month_set=sort(unique(substr(dachu$下单日期,1,7))) #月度连带率 month_associate_rate = data.frame() date1 = min(dachu$下单日期) for(i in seq(length(month_set))){ if(i < length(month_set)){ date2 = as.Date(paste(month_set[i+1], "-01", sep = "")) temp <- table(dachu$买家昵称[(dachu$下单日期 >= date1)&(dachu$下单日期 < date2)]) date1 = date2 }else{ temp = table(dachu$买家昵称[dachu$下单日期 >= date1]) } month_associate_rate = rbind(month_associate_rate, data.frame(month=month_set[i], count = length(temp), count2= sum(temp>1), rate=(sum(temp>1)/length(temp)))) } month_associate_rate #产品连带率 dachu$flag <- 0 head(dachu) temp = table(dachu$买家昵称) # library(dplyr) # temp2 = left_join(dachu, data.frame(x = names(temp)[temp>1], flag.y = 1), by= c("买家昵称" = "x"),suffix = c("", ".y")) temp2 = merge(dachu, data.frame(x = names(temp)[temp>1], flag.x = 1), by.x = "买家昵称", by.y = "x", all.x = TRUE) temp2$flag[temp2$flag.x==1] = 1 temp2$flag.x = NULL temp2 #定义保存产品连带率 prod_set=unique(dachu$商品ID) product_associate_rate = data.frame() #产品连带率 for(pi in prod_set){ temp <- temp2$flag[temp2$商品ID == pi] product_associate_rate = rbind(product_associate_rate, data.frame(product=pi, count = length(temp), count2= sum(temp==1), rate=(sum(temp==1)/length(temp)))) } product_associate_rate = product_associate_rate[order(product_associate_rate$count, decreasing = TRUE),] product_associate_rate$product = as.character(product_associate_rate$product) head(product_associate_rate) #验证 dachu[dachu$买家昵称 %in% dachu[dachu$商品ID=="42303520877",]$买家昵称,] #产品连带率前五月度变化 #temp2为产品连带率里计算的那个 prod_set = product_associate_rate$product[1:5] product_associate_rate_top5 = data.frame() date1 = min(temp2$下单日期) for(i in seq(length(month_set))){ if(i < length(month_set)){ date2 = as.Date(paste(month_set[i+1], "-01", sep = "")) temp <- temp2[(temp2$下单日期 >= date1)&(temp2$下单日期 < date2),] date1 = date2 }else{ temp = temp2[temp2$下单日期 >= date1,] } temp3 = data.frame(month=month_set[i]) for(pi in prod_set){ temp4 = temp$flag[temp$商品ID==pi] temp3 = cbind(temp3, length(temp4), sum(temp4==1), ifelse(length(temp4)==0,0,sum(temp4==1)/length(temp4))) } product_associate_rate_top5 = rbind(product_associate_rate_top5, temp3) } colnames(product_associate_rate_top5)[-1] <- paste('top',rep(1:5,each=3),c('count','count2','rate'),sep = '') product_associate_rate_top5 #图形展示 win.graph() opar<-par(mfrow=c(1,2)) plot(month_associate_rate$month, month_associate_rate$rate, type="l", col = "blue", main = "月度连带率", xlab = "月份", ylab="连带率") plot(product_associate_rate$rate, main = "产品连带率", xlab = "产品", ylab="连带率") par(opar) write.csv(month_associate_rate,"C:\\Users\\hasee\\Desktop\\month_associate_rate.csv") write.csv(product_associate_rate,"C:\\Users\\hasee\\Desktop\\product_associate_rate.csv") #, quote = TRUE write.csv(product_associate_rate_top5,"C:\\Users\\hasee\\Desktop\\product_associate_rate_top5.csv") #, quote = TRUE # dplyr包包含了各种关联查询的函数,如inner_join,left_join,full_join,rigth_join...... library(dplyr) library("nycflights13") # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day,tailnum, carrier) flights2 %>% left_join(airlines,by= "carrier") #merge(data.frame(x=1:3,y=0,z=2),data.frame(x=2:3,y=1:2),by=c("x"),all.x = T) ###******************************************* 回购率与首次消费金额关系 ********************************************### dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\suoyoukehushuju.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date", "numeric")) str(dachu) head(dachu,20) library(dplyr) temp=head(dachu,20) temp = head(arrange(dachu, 买家昵称, desc(下单时间)), 100);temp #flights[order(flights$year, flights$month, flights$day), ] #flights[order(desc(flights$arr_delay)), ] #filter(group_by(temp, 买家昵称)) temp <- dachu%>% arrange(买家昵称, 下单时间) %>% group_by(买家昵称)%>% mutate(count = n())%>% slice(1)%>% filter() win.graph() opar<-par(mfrow=c(1,2)) #实付金额——购买次数分布图 plot(temp$实付金额, temp$count) #实付金额——频数(人次)分布图 plot(table(temp$实付金额)) par(opar) #通过第一个图,暂且分组0-1000等距每200,1000-2000,2000以上 temp$group <- 0 temp[temp$实付金额 < 1000, ]$group <- temp[temp$实付金额 < 1000, ]$实付金额 %/% 100 temp[(temp$实付金额 >= 1000) & (temp$实付金额 < 2000), ]$group <- 10 temp[temp$实付金额 >= 2000, ]$group <- 11 head(temp,20) temp2 <- temp%>% group_by(group)%>% summarise(n1=sum(count>1), n2=n(), rate = n1/n2) win.graph() #各组回购率分布图 plot(temp2$group, temp2$rate) # i <- c("gamma","a") # switch(i, # beta = "You typed beta", # alpha = "You typed alpha", # gamma = "You typed gamma", # delta = "You typed delta" # ) ###******************************************* 客户联带对回购的影响 *******************************************### t0 <- Sys.time() dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\AnalysisOrderDownLoad-订单信息-子订单(全量)-10027396-8025-107.csv", header = T, encoding = "utf-8", colClasses = c(rep("character",4), rep("Date",3), rep("character",5), "integer","numeric","character",rep("numeric",2))) str(dachu) dachu <- dachu[,4:5] head(dachu) dachu$买家昵称 <- substr(dachu$买家昵称,3,nchar(dachu$买家昵称)-1) head(dachu,20) library(dplyr) #首单购买件数回购率 temp <- dachu %>% group_by(买家昵称, 下单时间) %>% summarise(count=n()) %>% arrange(买家昵称, 下单时间) %>% group_by(买家昵称) %>% mutate(count2=n()) %>% slice(1) %>% group_by(count) %>% mutate(n1 = n(), n2 = sum(count2>1), rate = n2/n1) %>% slice(1) %>% select(count, n1, n2, rate) temp win.graph() plot(temp$count, temp$rate, main="首单购买件数与回购率", xlab = "首单购买件数", ylab = "回购客户占比", col="red") #按月计算新客中回购客户占比 temp <- dachu %>% group_by(买家昵称, 下单时间) %>% summarise(count=n()) %>% #连带件数 mutate(year=as.integer(substr(下单时间,1,4)), month=as.integer(substr(下单时间,6,7))) %>% arrange(买家昵称, 下单时间) %>% group_by(买家昵称) %>% mutate(count2=n()) %>% #回购次数 slice(1) %>% #第一次出现(前面的按时间排序不可少)即为新客 group_by(year, month) %>% mutate(n1 = n(), n2 = sum(count>1), rate = n2/n1) %>% slice(1) %>% select(下单时间, year, month, n1, n2, rate) temp win.graph() time <- seq.Date(as.Date(paste(substr(min(temp$下单时间),1,7), "-01", sep = "")), by = "month", length = nrow(temp)) plot(time, temp$rate, main = "各月新客中连带客户占比", xlab = "月份", ylab = "首单购买多件客户占比", type = "l") #按订单统计连带率(即购买多件订单与总订单之比) temp <- dachu %>% group_by(买家昵称, 下单时间) %>% summarise(count=n()) sum(temp$count>1)/nrow(temp) Sys.time()-t0 ############################################################################################################### #setwd("H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803") setwd("D:\\Dasktop\\bigdata_game\\天池\\大厨") dat <- read.csv("kehushuju.csv",header=TRUE,encoding="utf-8",colClasses=c("character","Date","integer","numeric","integer"),stringsAsFactors = F) dat <- arrange(dat, 买家昵称, 下单日期) head(dat) # new_dat<-unique(dat) #数据量多时,计算量很大,而且基本不会出现重复记录,所以可以省略 # head(new_dat) library(dplyr) ##回购次数与回购概率 ### temp <- dat %>% group_by(买家昵称)%>% summarise(count=n()) head(temp) rr1 <- c() rr2 <- c() rate <- c() max_count <- max(temp$count) for (i in 1:(max_count-1)){ ###可能会出错,rate分母=0 # rr1[i] <- summarise(filter(temp,count==i+1),n()) # rr2[i] <- summarise(filter(temp,count>=i),n()) # rate[i] <- summarise(filter(temp,count==i+1),n())/summarise(filter(temp,count>=i),n()) rr1 <- c(rr1, sum(temp$count == i+1)) #效率更高 rr2 <- c(rr2, sum(temp$count >= i)) rate <- c(rate, rr1[i]/rr2[i]) #避免重复计算 } temp2<-filter(temp,count>=2) head(temp2) rrr<-cbind(rr1,rr2,rate) rrr # write.csv(rrr,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/rrr.csv") #计算回购周期##### #添加购买次数列 new_dat2 <- select(dat, 买家昵称,下单日期, 下单时点) # new_dat2<-data.frame(new_dat2) #已经是数据框结构,而且即便转换格式此处也不对,应该为:new_dat2<-as.data.frame(new_dat2) # new_dat2<-unique(new_dat2) # head(new_dat2) # temp2<-group_by(new_dat2,买家昵称) # temp2<-summarise(temp2,count=n()) # temp2 <- new_dat2 %>% # group_by(买家昵称) %>% # summarise(count=n()) # head(temp2) # count2<-unique(temp2$count) # # new_dat2$counts=0 # for(i in count2){ # rg<-temp[temp2$count==i,]$买家昵称; # new_dat2[new_dat2$买家昵称 %in% rg,]$counts=i # # } new_dat2 <- merge(new_dat2, temp, by=c('买家昵称')) head(new_dat2) # old_dat<-filter(new_dat2,counts>=2) # old_dat<-arrange(old_dat,下单日期) # old_dat <- new_dat2 %>% ##此处太慢,后面给出改进方法 # filter(count>=2) %>% # arrange(下单日期) # # old_dat<-unique(old_dat) # head(old_dat) # #max_count2<-max(old_dat$counts) # #num<-c(1:max_count2) # rebuy<-c() # redays<-c() # # t=1 # for(i in unique(old_dat$买家昵称) ){ # rg<-filter(old_dat,old_dat$买家昵称==i) # # for(j in 1:(rg$count[1]-1)) # { # #t_diff <- rg$下单日期[j+1] - rg$下单日期[j] # t_diff <- as.integer(rg$下单日期[j+1] - rg$下单日期[j]) # # rebuy[t]=j+1 # # redays[t]=t_diff # # t=t+1 # rebuy = c(rebuy,j+1) # redays = c(redays,t_diff) # } # } # # head(rebuy) # head(redays) # mydata<-data.frame(rebuy,redays) # #write.csv(mydata,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/mydata.csv") # head(mydata) ###各时点回购人数占比 #不考虑时间因素时 rate <- data.frame() for(i in sort(unique(dat$下单时点))){ temp2 = new_dat2[new_dat2$下单时点 == i,]$count rate = rbind(rate, c(i, sum(temp2>1)/length(temp2))) } colnames(rate) <- c("下单时点", "rate") rate #考虑时间因素时 ###如果考虑时间因素,则需加以下代码 new_dat3 <- arrange(new_dat2, 买家昵称, 下单日期) #最好加排序,防止出错 head(new_dat3, 50) # for(i in temp$买家昵称){ #由于循环较大故运行时间较长 # new_dat3[new_dat3$买家昵称 == i,]$count <- 1:(temp[temp$买家昵称 == i,]$count) # } # head(new_dat3, 50) #改进后,此方法必须对数据先排序!! # t0 <- Sys.time() # i <- 1; nmax <- nrow(new_dat3) # repeat{ # #m = i # n = new_dat3[i,4] # #ss = new_dat3[i,1] # # repeat{ # # i <- i + 1 # # if((new_dat3[i,1] != ss) | (i > nmax)){ # # new_dat3[m:(i-1),4] <- 1:new_dat3[m,4] # # break # # } # # } # new_dat3[i:(i + n - 1),4] <- 1:n # i = i+n # if(i > nmax) break # } # Sys.time()-t0 # # t0 <- Sys.time() # i <- 1; nmax <- nrow(new_dat3) # while(i <= nmax){ # #m = i # n = new_dat3[i,4] # #ss = new_dat3[i,1] # # repeat{ # # i <- i + 1 # # if((new_dat3[i,1] != ss) | (i > nmax)){ # # new_dat3[m:(i-1),4] <- 1:new_dat3[m,4] # # break # # } # # } # new_dat3[i:(i + n - 1),4] <- 1:n # i = i+n # } # Sys.time()-t0 t0 <- Sys.time() for(i in sort(unique(temp$count))){ #必须加sort排序 df = (new_dat3$count == i) new_dat3[df, 4] <- rep(1:i, sum(df)/i) } Sys.time()-t0 head(new_dat3, 50) tail(new_dat3,50) #计算 rate2 <- data.frame(下单时点=c(), rate=c()) for(i in sort(unique(dat$下单时点))){ temp2 = new_dat3[new_dat3$下单时点 == i,]$count rate2 = rbind(rate2, c(i, sum(temp2>1)/length(temp2))) } colnames(rate2) <- c("下单时点", "rate") rate2 #改进方法 new_dat3$t_diff <- as.integer(new_dat3$下单日期 - c(new_dat3$下单日期[1], new_dat3$下单日期[-nrow(new_dat3)])) head(new_dat3) new_dat3$t_diff[new_dat3$count==1] <- 0 mydata <- new_dat3 %>% select(count, t_diff) %>% filter(count > 1) %>% rename(rebuy = count, redays = t_diff) head(mydata) plot(mydata) #各次购买5天内回购情况 new_dat3$m5 <- (new_dat3$t_diff <5) new_dat3$m5[new_dat3$count == 1] <- 0 setwd("H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/自我研究") dat<-read.csv("kehushuju.csv",header=T,encoding="utf-8",colClasses=c("character","Date","integer","numeric","integer")) head(dat) library(dplyr) dat1<-arrange(dat,下单日期) head(dat1) m=5 #定义回购周期,M=5表示客户在5天内回购 counts<-c(rep(0,length(dat1[,2]))) t0<-Sys.time() for(i in 1:length(dat1[,2])){ t_run<-dat1[,2][i]+m goal_dat1<-filter(dat1,下单日期<=t_run) if(length(filter(goal_dat1,goal_dat1$买家昵称==dat1[,1][i])[,1])>=2){ counts[i]<-1 } } tt<-Sys.time()-t0 head(counts) end_dat5<-cbind(dat1,counts) write.csv(end_dat5,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/自我研究/end_dat5.csv")