问题
I have a data frame:
df <- structure(list(date = structure(c(17563, 17563, 17563, 17563,
17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563,
17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563, 17563,
17563, 17563, 17564, 17564, 17564, 17564, 17564, 17564, 17564,
17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564,
17564, 17564, 17564, 17564, 17564, 17564, 17564, 17564, 17565,
17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565,
17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565, 17565,
17565, 17565, 17565, 17565, 17565, 17566, 17566, 17566, 17566,
17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566,
17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566, 17566,
17566, 17566), class = "Date"), hour = c("00", "01", "02", "03",
"04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "00", "01",
"02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23",
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "00", "01", "02", "03", "04", "05", "06", "07", "08",
"09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20", "21", "22", "23"), offered = c(30L, 28L, 15L, 21L, 11L,
14L, 18L, 35L, 42L, 36L, 37L, 38L, 54L, 45L, 37L, 52L, 40L, 66L,
84L, 69L, 75L, 51L, 39L, 38L, 25L, 21L, 18L, 20L, 7L, 14L, 14L,
28L, 37L, 50L, 46L, 31L, 45L, 45L, 39L, 31L, 48L, 69L, 91L, 117L,
74L, 66L, 60L, 37L, 20L, 31L, 15L, 26L, 18L, 12L, 21L, 42L, 107L,
118L, 138L, 137L, 93L, 109L, 102L, 91L, 102L, 76L, 76L, 70L,
68L, 74L, 55L, 54L, 28L, 19L, 23L, 12L, 16L, 12L, 18L, 39L, 96L,
119L, 111L, 95L, 65L, 81L, 67L, 76L, 64L, 64L, 68L, 71L, 54L,
65L, 51L, 41L), answered = c(30L, 28L, 15L, 21L, 11L, 14L, 18L,
35L, 42L, 36L, 37L, 38L, 54L, 45L, 37L, 51L, 40L, 66L, 83L, 68L,
74L, 51L, 39L, 38L, 25L, 21L, 18L, 20L, 7L, 14L, 14L, 28L, 37L,
49L, 46L, 31L, 43L, 45L, 39L, 31L, 47L, 65L, 81L, 83L, 61L, 65L,
58L, 37L, 20L, 31L, 15L, 25L, 17L, 12L, 21L, 42L, 106L, 115L,
134L, 127L, 93L, 107L, 97L, 88L, 94L, 74L, 74L, 66L, 65L, 69L,
52L, 51L, 28L, 19L, 23L, 12L, 16L, 12L, 17L, 39L, 91L, 115L,
104L, 95L, 65L, 79L, 67L, 73L, 64L, 64L, 68L, 70L, 53L, 64L,
48L, 38L)), row.names = c(NA, -96L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(date = structure(c(17563,
17564, 17565, 17566), class = "Date"), .rows = list(1:24, 25:48,
49:72, 73:96)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
Which looks like this:
> head(df)
# A tibble: 6 x 4
# Groups: date [1]
date hour offered answered
<date> <chr> <int> <int>
1 2018-02-01 00 30 30
2 2018-02-01 01 28 28
3 2018-02-01 02 15 15
4 2018-02-01 03 21 21
5 2018-02-01 04 11 11
6 2018-02-01 05 14 14
How can I spread out the hour
column and preserve both the offered
and answered
values for each date?
I have tried using tidyr::spread()
but either keep getting error messages or lots of NA
values in the results. Also, I notice that it expects a single value
, which is confusing.
How can I achieve this?
UPDATE:
Having thought about the problem some more, I now realise that it would be easier to spread the date values and just keep either actual
or offered
.
I won't change the question as there are some helpful solutions posted that may assist others, but instead I will add to it.
Ultimately, something like this would be ideal:
回答1:
Here's a way using gather
and spread
from tidyr
-
df %>%
gather(key = variable, value = value, -date, -hour) %>%
spread(hour, value)
# A tibble: 8 x 26
date variable `00` `01` `02` `03` `04` `05` `06` `07` `08` `09` `10` `11` `12` `13` `14` `15` `16`
<date> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2018-02-01 answered 30 28 15 21 11 14 18 35 42 36 37 38 54 45 37 51 40
2 2018-02-01 offered 30 28 15 21 11 14 18 35 42 36 37 38 54 45 37 52 40
3 2018-02-02 answered 25 21 18 20 7 14 14 28 37 49 46 31 43 45 39 31 47
4 2018-02-02 offered 25 21 18 20 7 14 14 28 37 50 46 31 45 45 39 31 48
5 2018-02-03 answered 20 31 15 25 17 12 21 42 106 115 134 127 93 107 97 88 94
6 2018-02-03 offered 20 31 15 26 18 12 21 42 107 118 138 137 93 109 102 91 102
7 2018-02-04 answered 28 19 23 12 16 12 17 39 91 115 104 95 65 79 67 73 64
8 2018-02-04 offered 28 19 23 12 16 12 18 39 96 119 111 95 65 81 67 76 64
# ... with 7 more variables: `17` <int>, `18` <int>, `19` <int>, `20` <int>, `21` <int>, `22` <int>, `23` <int>
I'd suggest changing the hour values to character (ex. h00) before spreading.
回答2:
I'm not sure how wide you want the output to be, but here are two options with data.table melt
and dcast
library(data.table)
setDT(df)
dcast(melt(df, c('date', 'hour')), date + variable ~ paste0('hour_', hour))
#
# date variable hour_00 hour_01 hour_02 hour_03 hour_04 hour_05 hour_06 hour_07
# 1: 2018-02-01 offered 30 28 15 21 11 14 18 35
# 2: 2018-02-01 answered 30 28 15 21 11 14 18 35
# 3: 2018-02-02 offered 25 21 18 20 7 14 14 28
# 4: 2018-02-02 answered 25 21 18 20 7 14 14 28
# 5: 2018-02-03 offered 20 31 15 26 18 12 21 42
# 6: 2018-02-03 answered 20 31 15 25 17 12 21 42
# 7: 2018-02-04 offered 28 19 23 12 16 12 18 39
# 8: 2018-02-04 answered 28 19 23 12 16 12 17 39
# hour_08 hour_09 hour_10 hour_11 hour_12 hour_13 hour_14 hour_15 hour_16 hour_17
# 1: 42 36 37 38 54 45 37 52 40 66
# 2: 42 36 37 38 54 45 37 51 40 66
# 3: 37 50 46 31 45 45 39 31 48 69
# 4: 37 49 46 31 43 45 39 31 47 65
# 5: 107 118 138 137 93 109 102 91 102 76
# 6: 106 115 134 127 93 107 97 88 94 74
# 7: 96 119 111 95 65 81 67 76 64 64
# 8: 91 115 104 95 65 79 67 73 64 64
# hour_18 hour_19 hour_20 hour_21 hour_22 hour_23
# 1: 84 69 75 51 39 38
# 2: 83 68 74 51 39 38
# 3: 91 117 74 66 60 37
# 4: 81 83 61 65 58 37
# 5: 76 70 68 74 55 54
# 6: 74 66 65 69 52 51
# 7: 68 71 54 65 51 41
# 8: 68 70 53 64 48 38
Or if you want a separate column for offered and answered
dcast(df, date ~ hour, value.var = c('offered', 'answered'))
# date offered_00 offered_01 offered_02 offered_03 offered_04 offered_05
# 1: 2018-02-01 30 28 15 21 11 14
# 2: 2018-02-02 25 21 18 20 7 14
# 3: 2018-02-03 20 31 15 26 18 12
# 4: 2018-02-04 28 19 23 12 16 12
# offered_06 offered_07 offered_08 offered_09 offered_10 offered_11 offered_12
# 1: 18 35 42 36 37 38 54
# 2: 14 28 37 50 46 31 45
# 3: 21 42 107 118 138 137 93
# 4: 18 39 96 119 111 95 65
# offered_13 offered_14 offered_15 offered_16 offered_17 offered_18 offered_19
# 1: 45 37 52 40 66 84 69
# 2: 45 39 31 48 69 91 117
# 3: 109 102 91 102 76 76 70
# 4: 81 67 76 64 64 68 71
# offered_20 offered_21 offered_22 offered_23 answered_00 answered_01 answered_02
# 1: 75 51 39 38 30 28 15
# 2: 74 66 60 37 25 21 18
# 3: 68 74 55 54 20 31 15
# 4: 54 65 51 41 28 19 23
# answered_03 answered_04 answered_05 answered_06 answered_07 answered_08 answered_09
# 1: 21 11 14 18 35 42 36
# 2: 20 7 14 14 28 37 49
# 3: 25 17 12 21 42 106 115
# 4: 12 16 12 17 39 91 115
# answered_10 answered_11 answered_12 answered_13 answered_14 answered_15 answered_16
# 1: 37 38 54 45 37 51 40
# 2: 46 31 43 45 39 31 47
# 3: 134 127 93 107 97 88 94
# 4: 104 95 65 79 67 73 64
# answered_17 answered_18 answered_19 answered_20 answered_21 answered_22 answered_23
# 1: 66 83 68 74 51 39 38
# 2: 65 81 83 61 65 58 37
# 3: 74 74 66 65 69 52 51
# 4: 64 68 70 53 64 48 38
来源:https://stackoverflow.com/questions/55923514/how-can-i-spread-a-data-frame-from-long-to-wide-and-preserve-two-fields-data