Use cut to create 24 categories for a time variable

前端未结

关注

 1  470

无人共我

Here I import the data, do some manipulations to it (this is likely not going to be where the issue/fix lies)

The first two lines set my parameters for my cut.

相关标签:

1条回答

花落未央

2021-01-27 11:32

Instead of saving parts of the data.frame as separate files and doing the same operations on them, you can just group by multiple variables. You can use lubridate::month to extract the month as a number from each date (in base R you could use strptime(df$date, '%Y-%m-%d')$mon + 1), which lets you simply use ifelse to create a new grouping variable instead of cut with repeated labels (which will cause an error in R >= 3.4.0). Once you set all the grouping variables, summarizing is simple and DRY.

library(dplyr)

df %>% group_by(canopy_understory,    # Group by canopy/understory factor
                # Extract numeric month from date. If less than 5, make `season` "s" else "w", and group by it.
                season = ifelse(lubridate::month(date) < 5, 's', 'w'), 
                # Cut time by 0,100,200,...,2400, and group by the factor returned.
                hour = cut(time, seq(0, 2400, 100), include.lowest = TRUE)) %>% 
    summarise(temp_mean = mean(temp),    # For each group, calc mean and sd of temp.
              temp_sd = sd(temp))

#> # A tibble: 20 x 5
#> # Groups: canopy_understory, season [?]
#>    canopy_understory season              hour temp_mean temp_sd
#>               <fctr>  <chr>            <fctr>     <dbl>   <dbl>
#>  1                 c      w           [0,100]      21.5      NA
#>  2                 c      w         (500,600]      20.1      NA
#>  3                 c      w         (700,800]      25.5      NA
#>  4                 c      w       (900,1e+03]      29.0      NA
#>  5                 c      w (1.1e+03,1.2e+03]      28.0      NA
#>  6                 c      w (1.3e+03,1.4e+03]      28.5      NA
#>  7                 c      w (1.6e+03,1.7e+03]      27.5      NA
#>  8                 c      w (1.8e+03,1.9e+03]      25.5      NA
#>  9                 c      w   (2e+03,2.1e+03]      23.5      NA
#> 10                 c      w (2.1e+03,2.2e+03]      22.5      NA
#> 11                 u      s         (100,200]      23.6      NA
#> 12                 u      s         (300,400]      24.1      NA
#> 13                 u      s         (500,600]      24.1      NA
#> 14                 u      s         (700,800]      24.6      NA
#> 15                 u      s       (900,1e+03]      24.6      NA
#> 16                 u      s (1.1e+03,1.2e+03]      26.1      NA
#> 17                 u      s (1.3e+03,1.4e+03]      26.6      NA
#> 18                 u      s (1.5e+03,1.6e+03]      25.6      NA
#> 19                 u      s (1.7e+03,1.8e+03]      24.1      NA
#> 20                 u      s   (1.9e+03,2e+03]      24.1      NA

Standard deviations for the sample data are NA because there's only one observation in each group, but it should work fine on larger data.

Data

df <- structure(list(trap = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("LS_trap_10c", 
    "LS_trap_10u"), class = "factor"), serial_no = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L), .Label = c("7C000000395C1641", "9F00000039641541"
    ), class = "factor"), file_name = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
    ), .Label = c("trap10c_7C000000395C1641_150809.csv", "trap10u_9F00000039641541_160110.csv"
    ), class = "factor"), canopy_understory = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L), .Label = c("c", "u"), class = "factor"), date = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L), .Label = c("2015-05-28", "2015-05-29", "2016-01-01"
    ), class = "factor"), time = c(600L, 800L, 1000L, 1200L, 1400L, 
    1601L, 1803L, 2001L, 2200L, 0L, 159L, 359L, 559L, 759L, 959L, 
    1159L, 1359L, 1559L, 1759L, 1959L), temp = c(20.1, 25.5, 29, 
    28, 28.5, 27.5, 25.5, 23.5, 22.5, 21.5, 23.6, 24.1, 24.1, 24.6, 
    24.6, 26.1, 26.6, 25.6, 24.1, 24.1), humidity = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = "<NA>", class = "factor")), .Names = c("trap", 
    "serial_no", "file_name", "canopy_understory", "date", "time", 
    "temp", "humidity"), class = "data.frame", row.names = c("1", 
    "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
    "14", "15", "16", "17", "18", "19", "20"))

0 讨论(0)