Fastest Way to Add New Variables to A Large Data.Frame

 1 pkgs <- list("hflights", "doParallel", "foreach", "dplyr", "rbenchmark", "data.table")
 2 lapply(pkgs, require, character.only = T)
 3  
 4 data(hflights)
 5  
 6 benchmark(replications = 10, order = "user.self", relative = "user.self",
 7   transform = {
 8     ### THE GENERIC FUNCTION MODIFYING THE DATA.FRAME, SIMILAR TO DATA.FRAME() ###
 9     transform(hflights, wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), delay = ArrDelay + DepDelay)
10   },
11   within    = {
12     ### EVALUATE THE EXPRESSION WITHIN THE LOCAL ENVIRONMENT ###
13     within(hflights, {wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'); delay = ArrDelay + DepDelay})
14   },
15   mutate   = {
16     ### THE SPECIFIC FUNCTION IN DPLYR PACKAGE TO ADD VARIABLES ###
17     mutate(hflights, wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), delay = ArrDelay + DepDelay)
18   },
19   foreach = {
20     ### SPLIT AND THEN COMBINE IN PARALLEL ###
21     registerDoParallel(cores = 2)
22     v <- c(names(hflights), 'wday', 'delay')
23     f <- expression(ifelse(hflights$DayOfWeek %in% c(6, 7), 'weekend', 'weekday'),
24                     hflights$ArrDelay + hflights$DepDelay)
25     df <- foreach(fn = iter(f), .combine = mutate, .init = hflights) %dopar% {
26       eval(fn)
27     }
28     names(df) <- v
29   },
30   data.table = {
31     ### DATA.TABLE ###
32     data.table(hflights)[, c("wday", "delay") := list(ifelse(hflights$DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), hflights$ArrDelay + hflights$DepDelay)]
33   }
34 )
35  
36 #         test replications elapsed relative user.self sys.self user.child
37 # 4    foreach           10   1.442    1.000     0.240    0.144      0.848
38 # 2     within           10   0.667    2.783     0.668    0.000      0.000
39 # 3     mutate           10   0.679    2.833     0.680    0.000      0.000
40 # 5 data.table           10   0.955    3.983     0.956    0.000      0.000
41 # 1  transform           10   1.732    7.200     1.728    0.000      0.000

 

posted @ 2016-11-01 10:17  payton数据之旅  阅读(149)  评论(0编辑  收藏  举报