I have the following data structure
ID Type Values
1 A 5; 7; 8
2 A 6
3 B 2; 3
and I would like to reshape it to the fol
This should work but maybe there's a better approach:
#recreate data set
dat <- data.frame(ID=1:3, Type=c("A", "A", "B"), Values=c("5; 7; 8", "6", "2; 3"))
#split the Value column by ;
a <- strsplit(as.character(dat$Values), ";", fixed=TRUE)
#remove extra white
a <- lapply(a, function(x) gsub("^\\s+|\\s+$", "", x))
#get the length of each cell in Value so we can use this to index the rows
lens <- sapply(a, length)
#index rows and rename row names to numeric indexes
dat2 <- dat[rep(1:nrow(dat), lens), 1:2]
rownames(dat2) <- NULL
#add the stretched new column back
dat2$Value <- as.numeric(unlist(a))
My shot:
a <- data.frame(id = 1:3,
type = c("A", "A", "B"),
values = c("5; 7; 8", "6", "2; 3"))
g <- strsplit(as.character(a$values), ";")
data.frame(id = rep(a$id, lapply(g, length)),
type = rep(a$type, lapply(g, length)),
values = unlist(g))
A data.table
approach for coding elegance
library(data.table)
DT <- data.table(dat)
DT[, list(Value = unlist(strsplit(as.character(Values), '; '))), by = list(ID, Type)]
The answers so far are great. Here's yet another.
# The data
DF <- data.frame(ID=1:3,
Type=c('A','A','B'),
Values=c(' 5; 7; 8', '6', ' 2;3'))
This solution uses the colsplit()
function from the "reshape2" package. One downside is that it expects you to know the number of resulting columns needed.
require(reshape2)
DF2 <- data.frame(DF[-3], colsplit(DF$Values, ";", c("V.1", "V.2", "V.3")))
na.omit(melt(DF2, id.vars=c("ID", "Type")))
# ID Type variable value
# 1 1 A V.1 5
# 2 2 A V.1 6
# 3 3 B V.1 2
# 4 1 A V.2 7
# 6 3 B V.2 3
# 7 1 A V.3 8
From here you can sort and drop columns as required to get your final desired output.
Not a beautiful answer but it could be useful
DF <- data.frame(ID=1:3,
Type=c('A','A','B'),
Values=c(' 5; 7; 8', '6', ' 2;3')) # this is your df
# split vectors and coercing values to be numeric
List <- lapply(strsplit(Values, ';'), as.numeric)
# The desired output
data.frame(ID=rep(ID, sapply(List, length)),
Type=rep(Type, sapply(List, length)),
Values = unlist(List))
ID Type Values
1 1 A 5
2 1 A 7
3 1 A 8
4 2 A 6
5 3 B 2
6 3 B 3
Since you asked for a plyr
solution, here you go:
ddply(df, .(Type), function(foo) {
values <- unlist(strsplit(c(foo$Values), ";"))
data.frame(Type = rep(unique(foo$Type), length(values)), Values = values)
})