Given a situation such as the following
library(dplyr)
myData <- tbl_df(data.frame( var1 = rnorm(100),
var2 = letters[1:3] %
Inspired by the answers by Gregor and dimitris_ps, I wrote a dplyr style function that runs summarise for all combinations of group variables.
summarise_combo <- function(data, ...) {
groupVars <- group_vars(data) %>% map(as.name)
groupCombos <- map( 0:length(groupVars), ~combn(groupVars, ., simplify=FALSE) ) %>%
unlist(recursive = FALSE)
results <- groupCombos %>%
map(function(x) {data %>% group_by(!!! x) %>% summarise(...)} ) %>%
bind_rows()
results %>% select(!!! groupVars, everything())
}
Example
library(tidyverse)
mtcars %>% group_by(cyl, vs) %>% summarise_combo(cyl_n = n(), mean(mpg))
This seems convulated, and there's probably a way to simplify or fancy it up with a do
, but it works. Using your myData
and myGroups
,
results = lapply(myGroups, FUN = function(x) {
do.call(what = group_by_, args = c(list(myData), x)) %>%
summarise( n = length(var1),
avgVar1 = mean(var1))
}
)
> results[[1]]
Source: local data frame [3 x 3]
var2 n avgVar1
1 a 31 0.38929738
2 b 31 -0.07451717
3 c 38 -0.22522129
> results[[4]]
Source: local data frame [9 x 4]
Groups: var2
var2 var3 n avgVar1
1 a A 11 -0.1159160
2 a B 11 0.5663312
3 a C 9 0.7904056
4 b A 7 0.0856384
5 b B 13 0.1309756
6 b C 11 -0.4192895
7 c A 15 -0.2783099
8 c B 10 -0.1110877
9 c C 13 -0.2517602
> results[[7]]
# I won't paste them here, but it has all 27 rows, grouped by var2, var3 and var4.
I changed your summarise
call to average var1
since var2
isn't numeric.
I have created a function based on the answer of @Gregor and the comments that followed:
library(magrittr)
myData <- tbl_df(data.frame( var1 = rnorm(100),
var2 = letters[1:3] %>%
sample(100, replace = TRUE) %>%
factor(),
var3 = LETTERS[1:3] %>%
sample(100, replace = TRUE) %>%
factor(),
var4 = month.abb[1:3] %>%
sample(100, replace = TRUE) %>%
factor()))
combSummarise
combSummarise <- function(data, variables=..., summarise=...){
# Get all different combinations of selected variables (credit to @Michael)
myGroups <- lapply(seq_along(variables), function(x) {
combn(c(variables), x, simplify = FALSE)}) %>%
unlist(recursive = FALSE)
# Group by selected variables (credit to @konvas)
df <- eval(parse(text=paste("lapply(myGroups, function(x){
dplyr::group_by_(data, .dots=x) %>%
dplyr::summarize_( \"", paste(summarise, collapse="\",\""),"\")})"))) %>%
do.call(plyr::rbind.fill,.)
groupNames <- c(myGroups[[length(myGroups)]])
newNames <- names(df)[!(names(df) %in% groupNames)]
df <- cbind(df[, groupNames], df[, newNames])
names(df) <- c(groupNames, newNames)
df
}
combSummarise
combSummarise (myData, var=c("var2", "var3", "var4"),
summarise=c("length(var1)", "mean(var1)", "max(var1)"))
or
combSummarise (myData, var=c("var2", "var4"),
summarise=c("length(var1)", "mean(var1)", "max(var1)"))
or
combSummarise (myData, var=c("var2", "var4"),
summarise=c("length(var1)"))
etc
Using unite to create a new column is the simplest way
library(tidyverse)
df = tibble(
a = c(1,1,2,2,1,1,2,2),
b = c(3,4,3,4,3,4,3,4),
val = c(1,2,3,4,5,6,7,8)
)
print(df)#output1
df_2 = unite(df, 'combined_header', a, b, sep='_', remove=FALSE) #remove=F doesn't remove existing columns
print(df_2)#output2
df_2 %>% group_by(combined_header) %>%
summarize(avg_val=mean(val)) %>% print()#output3
#avg 1_3 = mean(1,5)=3 avg 1_4 = mean(2, 6) = 4
RESULTS
Output:
output1
a b val
<dbl> <dbl> <dbl>
1 1 3 1
2 1 4 2
3 2 3 3
4 2 4 4
5 1 3 5
6 1 4 6
7 2 3 7
8 2 4 8
output2
combined_header a b val
<chr> <dbl> <dbl> <dbl>
1 1_3 1 3 1
2 1_4 1 4 2
3 2_3 2 3 3
4 2_4 2 4 4
5 1_3 1 3 5
6 1_4 1 4 6
7 2_3 2 3 7
8 2_4 2 4 8
output3
combined_header avg_val
<chr> <dbl>
1 1_3 3
2 1_4 4
3 2_3 5
4 2_4 6