I have a large data.table in R with several columns with dollar values. In a different column I have an inflation adjustment number. I am trying to figure out how to update
This approach is also quite convenient, but likely slower than using set()
:
library(data.table); library(magrittr)
set.seed(42)
DT <- data.table(id=1:1000,year=round(runif(1000)*10),
inc1 = runif(1000), inc2 = runif(1000), inc3 = runif(1000),
deflator = rnorm(1000))
vars <- names(DT) %>% .[grepl("inc", .)]
DT[, (vars) := .SD * deflator, .SDcols = vars]
DT[]
id year inc1 inc2 inc3 deflator
1: 1 9 0.212563676 0.24806366 0.06860638 0.2505781
2: 2 9 -0.017438715 -0.12186792 -0.26241497 -0.2779240
3: 3 3 -1.414016119 -1.20714809 -0.76920337 -1.7247357
4: 4 8 -1.082336969 -1.78411512 -1.08720698 -2.0067049
5: 5 6 -0.644638321 -1.07757416 -0.20895576 -1.2918083
---
996: 996 1 -0.573551720 -1.93996157 -0.50171303 -2.1569621
997: 997 5 -0.007899417 -0.01561619 -0.05708009 -0.0920275
998: 998 1 -0.090975121 -0.30475714 -0.27291825 -0.3974001
999: 999 5 -0.045984079 -0.01563942 -0.07868934 -0.1383273
1000: 1000 0 -0.785962308 -0.63266975 -0.29247974 -0.8257650
You could try
DT[, (inc_cols) := lapply(.SD, function(x)
x * DT[['deflator']] ), .SDcols = inc_cols]
head(DT1,2)
# id year inc1 inc2 inc3 deflator
#1: 1 3 0.614838304 0.009796974 0.3236051 0.7735552
#2: 2 2 -0.001583579 -0.082289606 -0.1365115 -0.6644330
Or if you need a loop
for(inc in inc_cols){
nm1 <- as.symbol(inc)
DT[,(inc):= eval(nm1)*deflator]
}
head(DT,2)
# id year inc1 inc2 inc3 deflator
#1: 1 3 0.614838304 0.009796974 0.3236051 0.7735552
#2: 2 2 -0.001583579 -0.082289606 -0.1365115 -0.6644330
Or a possible option using set
which should be very fast as the overhead of [.data.table
is avoided (suggested by @Arun)
indx <- grep('inc', colnames(DT))
for(j in indx){
set(DT, i=NULL, j=j, value=DT[[j]]*DT[['deflator']])
}
head(DT,2)
# id year inc1 inc2 inc3 deflator
#1: 1 3 0.614838304 0.009796974 0.3236051 0.7735552
#2: 2 2 -0.001583579 -0.082289606 -0.1365115 -0.6644330
where
inc_cols <- grep('^inc', colnames(DT), value=TRUE)
set.seed(24)
DT <- data.table(id=1:1000,year=round(runif(1000)*10),
inc1 = runif(1000), inc2 = runif(1000), inc3 = runif(1000),
deflator = rnorm(1000))
Since you can use dplyr on data.tables, you could also do:
library(dplyr)
DT %>% mutate_each(funs(.*deflator), starts_with("inc"))
Which will multiply each column of DT that starts with "inc" by the "deflator" column.