问题
I have a need to do what is really what tidyr::spread()
does, but for multiple value columns.
If I have a data set like this:
te <- structure(list(Syllable = c("[pa]", "[ta]", "[ka]", "[pa]", "[ta]",
"[ka]", "[pa]", "[ta]", "[ka]", "[pa]"), PA = c(15.9252335141423,
2.17504491982172, 5.26727958979289, 4.48590068583509, 2.1316282072803e-13,
14.1415335887116, 3.51720477328246, 0.839953301362556, 5.74712643678048,
7.01396701583887), transient_mean = c(4.43699436235785, 4.8733556527069,
5.52844792982797, 3.63255704032305, 4.99835680315547, 5.5387775503751,
3.19517346916471, 4.40360523945946, 4.14203491258186, 3.51900453101706
), transient_sd = c(0.871280094068596, 1.51392328075964, 2.65764846931951,
1.25416942799974, 1.13391173514884, 1.75904804912773, 1.54594113209317,
1.69526308849507, 1.73693971862859, 1.31626295142865)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -10L))
which looks like this (for those of you just reading this):
> te
# A tibble: 10 x 4
Syllable PA transient_mean transient_sd
<chr> <dbl> <dbl> <dbl>
1 [pa] 1.59e+ 1 4.44 0.871
2 [ta] 2.18e+ 0 4.87 1.51
3 [ka] 5.27e+ 0 5.53 2.66
4 [pa] 4.49e+ 0 3.63 1.25
5 [ta] 2.13e-13 5.00 1.13
6 [ka] 1.41e+ 1 5.54 1.76
7 [pa] 3.52e+ 0 3.20 1.55
8 [ta] 8.40e- 1 4.40 1.70
9 [ka] 5.75e+ 0 4.14 1.74
10 [pa] 7.01e+ 0 3.52 1.32
I would like to make new columns from the value of the Syllable
column values so that I get a wider tibble with column names "[pa]PA","[pa] transient_mean","[pa]_ transient_sd",[ta]_PA","[ta]_transient_mean", .... and so on.
I have tried this of course:
> te %>%
+ spread(Syllable,PA:transient_sd)
Error: `var` must evaluate to a single number or a column name, not an integer vector
Call `rlang::last_error()` to see a backtrace
but I get a complaint then, presumably due to me selecting multiple columns.
Any ideas on how this data wrangling can be achieved?
回答1:
Probably your data is lacking a time variable that counts different observations of "[pa]", "[ta]", "[ka]"
. You could fix this with ave
.
te$time <- with(te, ave(as.character(Syllable), Syllable, FUN=seq_along))
te
# # A tibble: 10 x 5
# Syllable PA transient_mean transient_sd time
# <chr> <dbl> <dbl> <dbl> <chr>
# 1 [pa] 1.59e+ 1 4.44 0.871 1
# 2 [ta] 2.18e+ 0 4.87 1.51 1
# 3 [ka] 5.27e+ 0 5.53 2.66 1
# 4 [pa] 4.49e+ 0 3.63 1.25 2
# 5 [ta] 2.13e-13 5.00 1.13 2
# 6 [ka] 1.41e+ 1 5.54 1.76 2
# 7 [pa] 3.52e+ 0 3.20 1.55 3
# 8 [ta] 8.40e- 1 4.40 1.70 3
# 9 [ka] 5.75e+ 0 4.14 1.74 3
# 10 [pa] 7.01e+ 0 3.52 1.32 4
After that you could use reshape
of base R.
reshape(as.data.frame(te), timevar="Syllable", idvar="time", direction="wide")
# time PA.[pa] transient_mean.[pa] transient_sd.[pa] PA.[ta]
# 1 1 15.925234 4.436994 0.8712801 2.175045e+00
# 4 2 4.485901 3.632557 1.2541694 2.131628e-13
# 7 3 3.517205 3.195173 1.5459411 8.399533e-01
# 10 4 7.013967 3.519005 1.3162630 NA
# transient_mean.[ta] transient_sd.[ta] PA.[ka] transient_mean.[ka]
# 1 4.873356 1.513923 5.267280 5.528448
# 4 4.998357 1.133912 14.141534 5.538778
# 7 4.403605 1.695263 5.747126 4.142035
# 10 NA NA NA NA
# transient_sd.[ka]
# 1 2.657648
# 4 1.759048
# 7 1.736940
# 10 NA
回答2:
library(tidyverse)
gather(te,k,val,-Syllable) %>% mutate(k=paste0(Syllable,'_',k)) %>%
select(-Syllable) %>% group_by(k) %>% mutate(rid=row_number()) %>%
spread(k,val) %>% select(-rid)
# A tibble: 4 x 9
`[ka]_PA` `[ka]_transient_~ `[ka]_transient_~ `[pa]_PA` `[pa]_transient~ `[pa]_transient~ `[ta]_PA` `[ta]_transient~ `[ta]_transient~
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 5.27 5.53 2.66 15.9 4.44 0.871 2.18e+ 0 4.87 1.51
2 14.1 5.54 1.76 4.49 3.63 1.25 2.13e-13 5.00 1.13
3 5.75 4.14 1.74 3.52 3.20 1.55 8.40e- 1 4.40 1.70
4 NA NA NA 7.01 3.52 1.32 NA NA NA
来源:https://stackoverflow.com/questions/55887750/multiple-column-spread