I have a dataframe like this
df <-data.frame(id = c(1,2),
value = c(25,24),
features = c(\"A,B,D,F\",\"C,B,E\"))
print(df)
i
Another one using splitstackshape
and data.table
(installation instructions here):
require(splitstackshape)
require(data.table) # v1.9.5+
ans <- cSplit(df, 'features', sep = ',', 'long')
dcast(ans, id + value ~ features, fun.aggregate = length)
# id value A B C D E F
# 1: 1 25 1 1 0 1 0 1
# 2: 2 24 0 1 1 0 1 0
If you're using data.table v1.9.4
, then replace dcast
with dcast.data.table
.
Alternatively, you can use cSplit_e
, like this:
cSplit_e(df, "features", ",", type = "character", fill = 0)
## id value features features_A features_B features_C features_D features_E features_F
## 1 1 25 A,B,D,F 1 1 0 1 0 1
## 2 2 24 C,B,E 0 1 1 0 1 0
A dplyr/tidyr solution
library(dplyr)
library(tidyr)
separate(df,features,1:4,",",extra="merge") %>%
gather(key,letter,-id,-value) %>%
filter(!is.na(letter)) %>%
select(-key) %>%
mutate(n=1) %>%
spread(letter,n) %>%
mutate_each(funs(ifelse(is.na(.),0,1)),A:F)
This is yet another use case for merge
after suitable transformation.
library(reshape2)
f<-with(df,stack(setNames(strsplit(as.character(features),","),id)))
d<-dcast(f,ind~values,length,value.var="ind")
out<-merge(df[,1:2],d,by.x="id",by.y="ind")
print(out)
id value A B C D E F 1 1 25 1 1 0 1 0 1 2 2 24 0 1 1 0 1 0
This can also be done using only default libraries (without reshape2
) in a variety of slightly messier ways. In the above, you can substitute the d
and out
lines with the following instead:
d<-xtabs(count~ind+values,transform(f,count=1))
out<-merge(df[,1:2],as.data.frame.matrix(d),by.x="id",by.y="row.names")
You can do:
library(splitstackshape)
library(qdapTools)
df1 = data.frame(cSplit(df, 'features', sep=',', type.convert=F))
cbind(df1[1:2], mtabulate(as.data.frame(t(df1[-c(1,2)]))))
# id value A B C D E F
#1: 1 25 1 1 0 1 0 1
#2: 2 24 0 1 1 0 1 0