问题
I have a table with 6 columns Z1 to Z6, and I want to calculate the absolute value of the difference between each of these columns.
So far, I enumerate all the differences in a mutate command:
FactArray <- FactArray %>% mutate(diff12 = abs(Z1-Z2),
diff13 = abs(Z1-Z3),
diff14 = abs(Z1-Z4),
diff15 = abs(Z1-Z5),
diff16 = abs(Z1-Z6),
diff23 = abs(Z2-Z3),
diff24 = abs(Z2-Z4),
diff25 = abs(Z2-Z5),
diff26 = abs(Z2-Z6),
diff34 = abs(Z3-Z4),
diff35 = abs(Z3-Z5),
diff36 = abs(Z3-Z6),
diff46 = abs(Z4-Z6),
diff56 = abs(Z5-Z6))
But I realise this is error prone and will have to be rewritten if I have a different number of columns.
Is there any way to do this "automatically"? I mean in a way such as it would adjust itself if I am considering any number of columns?
Best,
Damien
回答1:
You can generate all possible combination of the columns using combn
and subtract them.
cols <- paste0('Z', 1:6)
combn(cols, 2, function(x) abs(df[[x[1]]] - df[[x[2]]]))
Here's using a small reproducible example also adding appropriate column names.
set.seed(123)
df <- data.frame(Z1 = sample(10, 4), Z2 = sample(10, 4), Z3 = sample(10,4))
cols <- paste0('Z', 1:3)
new_cols <- combn(cols, 2, paste0, collapse = "_")
df[new_cols] <- combn(cols, 2, function(x) abs(df[[x[1]]] - df[[x[2]]]))
df
# Z1 Z2 Z3 Z1_Z2 Z1_Z3 Z2_Z3
#1 3 6 6 3 3 0
#2 10 5 9 5 1 4
#3 2 4 2 2 0 2
#4 8 10 3 2 5 7
回答2:
Here is a function that can be used in a magrittr
pipe.
fun <- function(X, cols, pref = "diff", sep = "", collapse = ""){
f <- function(x) abs(x[[1]] - x[[2]])
cmb <- combn(X[cols], 2, f)
out <- as.data.frame(cmb)
nms <- combn(cols, 2, paste, collapse = collapse)
nms <- paste(pref, nms, sep = sep)
names(out) <- nms
out
}
library(dplyr)
df1 %>% fun(1:6)
df1 %>% bind_cols(df1 %>% fun(1:6))
Data
set.seed(2020)
df1 <- replicate(6, sample(10))
df1 <- as.data.frame(df1)
回答3:
It's easiest if you first make your data tidy. The approach below is easily generalised and robust.
library(tidyverse)
# Generate test data
df <- tibble(id=1:5, z1=rnorm(5), z2=rnorm(5), z3=rnorm(5), z4=rnorm(5), z5=rnorm(5), z6=rnorm(5))
# Tidy data
tidyDF1 <- df %>% pivot_longer(values_to="Value1", names_pattern="z(\\d)", names_to="Index1", cols=starts_with("z"))
# Copy the tidy data
tidyDF2 <- tidyDF1 %>%
rename(Value2=Value1, Index2=Index1) %>%
mutate(Index2=as.integer(Index2))
# Create a copies of each value, one for each column in the original data
tidyDF1 <- tidyDF1 %>% expand(nesting(id, Index1, Value1), Index2=1:6)
joinDF <- tidyDF1 %>%
left_join(tidyDF2, by=c("id", "Index2")) %>% # Create all pairwise combinations
mutate(Diff=Value1 - Value2) %>% # Calculate differences
filter(Index1 != Index2) # Remove comparisons with self
# Present the results in "wide format"
joinDF %>%
pivot_wider(
id_cols="id",
values_from="Diff",
names_from=c("Index1", "Index2"),
names_prefix="Diff",
names_sep=""
)
来源:https://stackoverflow.com/questions/62572690/calculate-all-the-absolute-differences-between-6-columns-of-a-table-using-mutate