问题
I would like to return the significant matches from the following result shown in Fig. 1
library("corrplot")
M <- cor(mtcars)
# http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
N <- length(mtcars) -1
p.mat <- cor.mtest(mtcars)
head(p.mat[, 1:N])
ids <- seq(1,N)
corrplot(M, type="upper", order="hclust", tl.pos=c("td"), method="circle",
tl.cex = 0.5, tl.col = 'black',
diag = FALSE, p.mat = p.mat,
sig.level = 0.0000005)
Fig. 1 Output
Expected output
cyl: wt hp
disp: wt hp cyl
...
R: 3.3.1
OS: Debian 8.5
回答1:
The usual warnings regarding multiple testing apply here.
I would write a vectorized cor.test (there is a reason that is not available from the stats package, see above).
cor.test.all <- function(DF) {
#based on code from stats:::cor.test.default
#see license() for the license
#two-sided test for pearson correlation
#without adjustment of p-values
#no Na treatment
r <- cor(DF)
df <- nrow(DF) - 2L
t <- sqrt(df) * r/sqrt(1 - r^2)
2 * pmin(pt(t, df), pt(t, df, lower.tail = FALSE))
}
cor.p <- cor.test.all(mtcars)
diag(cor.p) <- NA
res <- which(cor.p < 0.0000005, arr.ind = TRUE)
split(colnames(cor.p)[res[,2]], rownames(cor.p)[res[,1]])
#$am
#[1] "gear"
#
#$cyl
#[1] "mpg" "disp" "hp" "wt" "vs"
#
#$disp
#[1] "mpg" "cyl" "hp" "wt"
#
#$gear
#[1] "am"
#
#$hp
#[1] "mpg" "cyl" "disp"
#
#$mpg
#[1] "cyl" "disp" "hp" "wt"
#
#$vs
#[1] "cyl"
#
#$wt
#[1] "mpg" "cyl" "disp"
回答2:
What about iterating through each variable name pairs using cor.test
? Eg:
sapply(names(mtcars), function(n1) {
p <- sapply(setdiff(names(mtcars), n1), function(n2)
cor.test(mtcars[, n1], mtcars[, n2])$p.value < 0.0000005)
names(p)[p]
})
# $mpg
# [1] "cyl" "disp" "hp" "wt"
#
# $cyl
# [1] "mpg" "disp" "hp" "wt" "vs"
#
# $disp
# [1] "mpg" "cyl" "hp" "wt"
#
# $hp
# [1] "mpg" "cyl" "disp"
#
# $drat
# character(0)
#
# $wt
# [1] "mpg" "cyl" "disp"
#
# $qsec
# character(0)
#
# $vs
# [1] "cyl"
#
# $am
# [1] "gear"
#
# $gear
# [1] "am"
#
# $carb
# character(0)
来源:https://stackoverflow.com/questions/40522547/how-to-return-significant-matches-in-r-corrplot