I have a df where columns 2 and beyond are dollar amounts such as $1004.23, ($1482.40), $2423.94 etc. Similar to the example below:
> df
id desc price
I might approach this more like the following:
dat <- read.table(text = "id desc price
1 0 apple $1.00
2 1 banana ($2.25)
3 2 grapes $1.97",sep = "",header = TRUE,stringsAsFactors = FALSE)
dat$neg <- ifelse(grepl("^\\(.+\\)$",dat$price),-1,1)
dat$price1 <- with(dat,as.numeric(gsub("[^0-9.]","",price)) * neg)
> dat
id desc price neg price1
1 0 apple $1.00 1 1.00
2 1 banana ($2.25) -1 -2.25
3 2 grapes $1.97 1 1.97
...where if you're doing this for multiple columns, you probably wouldn't store the +/- info in the data frame each time, but you get the basic idea.
Another possible solution which builds on your own attempt and takes into account that you need to transform more columns than in the example:
d[,-c(1:2)] <- lapply(d[,-c(1:2)],
function(x) as.numeric(gsub('[$,]', '', sub(")", "", sub("(", "-", x, fixed=TRUE), fixed=TRUE))))
which gives:
> d
id desc price price2
1 0 apple 1.00 -5.90
2 1 banana -2.25 2.39
3 2 grapes 1.97 -0.95
Or using a for-loop:
for(i in 3:ncol(d)){
d[[i]] <- as.numeric(gsub('[$,]', '', sub(")", "", sub("(", "-", d[[i]], fixed=TRUE), fixed=TRUE)))
}
Or using the data.table
package:
library(data.table)
cols <- names(d)[-c(1:2)]
setDT(d)[, (cols) := lapply(.SD, function(x) as.numeric(gsub('[$,]', '', sub(")", "", sub("(", "-", x, fixed=TRUE), fixed=TRUE)))),
.SDcols = cols]
Or using the dplyr
package:
library(dplyr)
d %>%
mutate_all(funs(as.numeric(gsub('[$,]', '', sub(")", "", sub("(", "-", ., fixed=TRUE), fixed=TRUE)))), -c(1:2))
which will all give you the same result.
Used data:
d <- structure(list(id = 0:2, desc = c("apple", "banana", "grapes"),
price = c("$1.00", "($2.25)", "$1.97"),
price2 = c("($5.9)", "$2.39", "($0.95)")),
.Names = c("id", "desc", "price", "price2"), class = "data.frame", row.names = c("1", "2", "3"))
for(i in 1:nrow(df)){
df[i,3] <- as.character(sub(")", "", sub("(", "-", as.character(df[i,3]), fixed=TRUE), fixed=TRUE))
df[i,3] <- as.numeric(gsub('[$,]', '', df[i,3]))
}
This is similar to Matt's answer, but it is vectorized (no loop over the rows needed). It further combines Procrastinatus Maximus' approach to treat several columns, and it also works if the values are originally stored as factors:
df1[3:ncol(df1)] <- apply(df1[3:ncol(df1)], 2, function(x)
as.numeric(gsub("(", "-", gsub(")", "", gsub("$", "",
as.character(x), fixed=TRUE)), fixed=TRUE)))
#> df1
# id desc price price2
#1 0 apple 1.00 -5.90
#2 1 banana -2.25 2.39
#3 2 grapes 1.97 -0.95
data
df1 <- structure(list(id = 0:2, desc = structure(1:3, .Label = c("apple",
"banana", "grapes"), class = "factor"), price = structure(c(1L, 3L, 2L),
.Label = c("$1.00", "$1.97", "($2.25)"), class = "factor"),
price2 = structure(c(3L, 2L, 1L),
.Label = c("($0.95)", "$2.39", "($5.90"),
class = "factor")), .Names = c("id", "desc", "price", "price2"),
class = "data.frame", row.names = c("1", "2", "3"))