In the last question I did they pointed out that less data would be easy to read and understand as part of the reproducible example. On the way to asking again I tried to sh
All of that extra funk is from your factor
levels. If you know your problem will still be reproducible after dropping these levels, then you can consider (wait for it) droplevels
:
> dput(droplevels(head(data)))
structure(list(GOterm = structure(1:6, .Label = c("GO:0000746",
"GO:0000910", "GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399"
), class = "factor"), GOdesc = structure(c(1L, 2L, 4L, 3L, 5L,
6L), .Label = c("conjugation", "cytokinesis", "DNA metabolic process",
"generation of precursor metabolites and energy", "transcription",
"tRNA metabolic process"), class = "factor"), GSA_p33_SC = c(NA,
-1, NA, NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA),
GSA_p38_SC = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), GSA_p52_SC = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p52_X33 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_SC = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_X33 = c(1,
NA, NA, NA, NA, NA), GSA_SC_X33 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), .Names = c("GOterm", "GOdesc",
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC",
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA,
6L), class = "data.frame")
This is more easily demonstrated in the following example:
x <- factor("A", levels = LETTERS)
x
# [1] A
# Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
dput(x)
# structure(1L, .Label = c("A", "B", "C", "D", "E", "F", "G", "H",
# "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
# "V", "W", "X", "Y", "Z"), class = "factor")
dput(droplevels(x))
# structure(1L, .Label = "A", class = "factor")
Another way to shorten it up would be to convert the columns to character
before dput
. The data can then be read back in with as.data.frame
and factor levels are preserved.
First subset
> data2 <- data[sample(nrow(data), 4), ]
Then dput
as characters
> d <- dput(lapply(data2, as.character))
structure(list(GOterm = c("GO:0000746", "GO:0070647", "GO:0006914",
"GO:0007010"), GOdesc = c("conjugation", NA, NA, "cytoskeleton organization and biogenesis"
), GSA_p33_SC = c(NA_character_, NA_character_, NA_character_,
NA_character_), GSA_p33_X33 = c(NA, NA, "1", "1"), GSA_p38_SC = c(NA_character_,
NA_character_, NA_character_, NA_character_), GSA_p38_X33 = c(NA_character_,
NA_character_, NA_character_, NA_character_), GSA_p52_SC = c(NA,
"-1", NA, NA), GSA_p52_X33 = c(NA, NA, NA, "1"), GSA_p64_SC = c(NA,
NA, NA, "1"), GSA_p64_X33 = c("1", NA, NA, NA), GSA_SC_X33 = c(NA,
NA, NA, "1")), .Names = c("GOterm", "GOdesc", "GSA_p33_SC", "GSA_p33_X33",
"GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", "GSA_p52_X33", "GSA_p64_SC",
"GSA_p64_X33", "GSA_SC_X33"))
And read back in
> as.data.frame(d)