Here I import the data, do some manipulations to it (this is likely not going to be where the issue/fix lies)
The first two lines set my parameters for my cut.
<Instead of saving parts of the data.frame as separate files and doing the same operations on them, you can just group by multiple variables. You can use lubridate::month
to extract the month as a number from each date (in base R you could use strptime(df$date, '%Y-%m-%d')$mon + 1
), which lets you simply use ifelse
to create a new grouping variable instead of cut
with repeated labels (which will cause an error in R >= 3.4.0). Once you set all the grouping variables, summarizing is simple and DRY.
library(dplyr)
df %>% group_by(canopy_understory, # Group by canopy/understory factor
# Extract numeric month from date. If less than 5, make `season` "s" else "w", and group by it.
season = ifelse(lubridate::month(date) < 5, 's', 'w'),
# Cut time by 0,100,200,...,2400, and group by the factor returned.
hour = cut(time, seq(0, 2400, 100), include.lowest = TRUE)) %>%
summarise(temp_mean = mean(temp), # For each group, calc mean and sd of temp.
temp_sd = sd(temp))
#> # A tibble: 20 x 5
#> # Groups: canopy_understory, season [?]
#> canopy_understory season hour temp_mean temp_sd
#> <fctr> <chr> <fctr> <dbl> <dbl>
#> 1 c w [0,100] 21.5 NA
#> 2 c w (500,600] 20.1 NA
#> 3 c w (700,800] 25.5 NA
#> 4 c w (900,1e+03] 29.0 NA
#> 5 c w (1.1e+03,1.2e+03] 28.0 NA
#> 6 c w (1.3e+03,1.4e+03] 28.5 NA
#> 7 c w (1.6e+03,1.7e+03] 27.5 NA
#> 8 c w (1.8e+03,1.9e+03] 25.5 NA
#> 9 c w (2e+03,2.1e+03] 23.5 NA
#> 10 c w (2.1e+03,2.2e+03] 22.5 NA
#> 11 u s (100,200] 23.6 NA
#> 12 u s (300,400] 24.1 NA
#> 13 u s (500,600] 24.1 NA
#> 14 u s (700,800] 24.6 NA
#> 15 u s (900,1e+03] 24.6 NA
#> 16 u s (1.1e+03,1.2e+03] 26.1 NA
#> 17 u s (1.3e+03,1.4e+03] 26.6 NA
#> 18 u s (1.5e+03,1.6e+03] 25.6 NA
#> 19 u s (1.7e+03,1.8e+03] 24.1 NA
#> 20 u s (1.9e+03,2e+03] 24.1 NA
Standard deviations for the sample data are NA
because there's only one observation in each group, but it should work fine on larger data.
Data
df <- structure(list(trap = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("LS_trap_10c",
"LS_trap_10u"), class = "factor"), serial_no = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("7C000000395C1641", "9F00000039641541"
), class = "factor"), file_name = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("trap10c_7C000000395C1641_150809.csv", "trap10u_9F00000039641541_160110.csv"
), class = "factor"), canopy_understory = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("c", "u"), class = "factor"), date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("2015-05-28", "2015-05-29", "2016-01-01"
), class = "factor"), time = c(600L, 800L, 1000L, 1200L, 1400L,
1601L, 1803L, 2001L, 2200L, 0L, 159L, 359L, 559L, 759L, 959L,
1159L, 1359L, 1559L, 1759L, 1959L), temp = c(20.1, 25.5, 29,
28, 28.5, 27.5, 25.5, 23.5, 22.5, 21.5, 23.6, 24.1, 24.1, 24.6,
24.6, 26.1, 26.6, 25.6, 24.1, 24.1), humidity = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "<NA>", class = "factor")), .Names = c("trap",
"serial_no", "file_name", "canopy_understory", "date", "time",
"temp", "humidity"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20"))