Here I import the data, do some manipulations to it (this is likely not going to be where the issue/fix lies)
The first two lines set my parameters for my cut.
Instead of saving parts of the data.frame as separate files and doing the same operations on them, you can just group by multiple variables. You can use lubridate::month
to extract the month as a number from each date (in base R you could use strptime(df$date, '%Y-%m-%d')$mon + 1
), which lets you simply use ifelse
to create a new grouping variable instead of cut
with repeated labels (which will cause an error in R >= 3.4.0). Once you set all the grouping variables, summarizing is simple and DRY.
df %>% group_by(canopy_understory, # Group by canopy/understory factor
# Extract numeric month from date. If less than 5, make `season` "s" else "w", and group by it.
season = ifelse(lubridate::month(date) < 5, 's', 'w'),
# Cut time by 0,100,200,...,2400, and group by the factor returned.
hour = cut(time, seq(0, 2400, 100), include.lowest = TRUE)) %>%
summarise(temp_mean = mean(temp), # For each group, calc mean and sd of temp.
temp_sd = sd(temp))
#> # A tibble: 20 x 5
#> # Groups: canopy_understory, season [?]
#> canopy_understory season hour temp_mean temp_sd
#> 1 c w [0,100] 21.5 NA
#> 2 c w (500,600] 20.1 NA
#> 3 c w (700,800] 25.5 NA
#> 4 c w (900,1e+03] 29.0 NA
#> 5 c w (1.1e+03,1.2e+03] 28.0 NA
#> 6 c w (1.3e+03,1.4e+03] 28.5 NA
#> 7 c w (1.6e+03,1.7e+03] 27.5 NA
#> 8 c w (1.8e+03,1.9e+03] 25.5 NA
#> 9 c w (2e+03,2.1e+03] 23.5 NA
#> 10 c w (2.1e+03,2.2e+03] 22.5 NA
#> 11 u s (100,200] 23.6 NA
#> 12 u s (300,400] 24.1 NA
#> 13 u s (500,600] 24.1 NA
#> 14 u s (700,800] 24.6 NA
#> 15 u s (900,1e+03] 24.6 NA
#> 16 u s (1.1e+03,1.2e+03] 26.1 NA
#> 17 u s (1.3e+03,1.4e+03] 26.6 NA
#> 18 u s (1.5e+03,1.6e+03] 25.6 NA
#> 19 u s (1.7e+03,1.8e+03] 24.1 NA
#> 20 u s (1.9e+03,2e+03] 24.1 NA
Standard deviations for the sample data are NA
because there's only one observation in each group, but it should work fine on larger data.
df <- structure(list(trap = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("LS_trap_10c",
"LS_trap_10u"), class = "factor"), serial_no = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("7C000000395C1641", "9F00000039641541"
), class = "factor"), file_name = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("trap10c_7C000000395C1641_150809.csv", "trap10u_9F00000039641541_160110.csv"
), class = "factor"), canopy_understory = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("c", "u"), class = "factor"), date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("2015-05-28", "2015-05-29", "2016-01-01"
), class = "factor"), time = c(600L, 800L, 1000L, 1200L, 1400L,
1601L, 1803L, 2001L, 2200L, 0L, 159L, 359L, 559L, 759L, 959L,
1159L, 1359L, 1559L, 1759L, 1959L), temp = c(20.1, 25.5, 29,
28, 28.5, 27.5, 25.5, 23.5, 22.5, 21.5, 23.6, 24.1, 24.1, 24.6,
24.6, 26.1, 26.6, 25.6, 24.1, 24.1), humidity = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "", class = "factor")), .Names = c("trap",
"serial_no", "file_name", "canopy_understory", "date", "time",
"temp", "humidity"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20"))