问题
I adapted a heatmap plot for a confusion matrix from this answer.
However I would like to twist it. In the diagonal (from top left to bottom right)
are the matches (correct classifications). My aim would be, to plot this diagonal in a yellow color palette. And mismatches (so all tiles except those in the diagonal) in a red color palette.
In my plot.cm
function I can get the diagonal with
cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal
And with the correct geom_tile
aesthetics I can get only the diagonal (in the desired yellow-ish) color scheme
geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(color = Freq)) +
scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white')
However I am not able to get the second color scheme on the elements of cm_d$ndiag
I found the package ggnewscale that offers new_scale()
as well as new_scale_fill()
.
I tired to implement it with the help of this blog. However the result are only darkgray filled tiles for the rest of the heatmap
# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2) # to plot
library(gridExtra) # to put more
library(grid) # plot together
library(likert) # for reversing the factor order
library(ggnewscale)
plot.cm <- function(cm){
# extract the confusion matrix values as data.frame
cm_d <- as.data.frame(cm$table)
cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal
cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
cm_d$Reference <- reverse.levels(cm_d$Reference) # diagonal starts at top left
# plotting the matrix
cm_d_p <- ggplot(data = cm_d, aes(x = Prediction , y = Reference, fill = Freq))+
scale_x_discrete(position = "top") +
geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(color = Freq)) +
scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') +
# THIS DOESNT WORK
# new_scale("fill") +
# geom_tile( data = cm_d[!is.na(cm_d$ndiag), ],aes(color = Freq)) +
# scale_fill_gradient(guide = FALSE,low=alpha("red",0.75), high="darkred",na.value = 'white') +
geom_text(aes(label = Freq), color = 'black', size = 6) +
theme_light() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none",
panel.border = element_blank(),
plot.background = element_blank(),
axis.line = element_blank())
return(cm_d_p)
}
Sample Data:
Simulated Caret Confusion Matrix
library(caret)
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
回答1:
I believe the issue is simply that you're specifying aes(color = Freq)
instead of aes(fill = Freq
. Is plot what you were aiming for? You could also simplify all of this by just using a divergent color scale and creating a new variable that marks Freq as negative if it's off the diagonal? See second example below
# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2) # to plot
library(gridExtra) # to put more
library(grid) # plot together
library(likert) # for reversing the factor order
#> Loading required package: xtable
library(ggnewscale)
plot.cm <- function(cm){
# extract the confusion matrix values as data.frame
cm_d <- as.data.frame(cm$table)
cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal
cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
cm_d$Reference <- reverse.levels(cm_d$Reference) # diagonal starts at top left
# plotting the matrix
cm_d_p <- ggplot(data = cm_d, aes(x = Prediction , y = Reference, fill = Freq))+
scale_x_discrete(position = "top") +
geom_tile( data = cm_d[!is.na(cm_d$diag), ],aes(fill = Freq)) +
scale_fill_gradient(guide = FALSE,low=alpha("lightyellow",0.75), high="yellow",na.value = 'white') +
# THIS DOESNT WORK
new_scale("fill") +
geom_tile( data = cm_d[!is.na(cm_d$ndiag), ],aes(fill = Freq)) +
scale_fill_gradient(guide = FALSE,low=alpha("red",0.75), high="red",na.value = 'white') +
geom_text(aes(label = Freq), color = 'black', size = 6) +
theme_light() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none",
panel.border = element_blank(),
plot.background = element_blank(),
axis.line = element_blank())
return(cm_d_p)
}
library(caret)
#> Loading required package: lattice
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
#> Warning: Removed 8 rows containing missing values (geom_text).
Created on 2020-04-29 by the reprex package (v0.3.0)
# adapted from https://stackoverflow.com/a/60150826/7318488
library(ggplot2) # to plot
library(gridExtra) # to put more
library(grid) # plot together
library(likert) # for reversing the factor order
#> Loading required package: xtable
library(ggnewscale)
plot.cm <- function(cm){
# extract the confusion matrix values as data.frame
cm_d <- as.data.frame(cm$table)
cm_d$diag <- cm_d$Prediction == cm_d$Reference # Get the Diagonal
cm_d$ndiag <- cm_d$Prediction != cm_d$Reference # Not the Diagonal
cm_d[cm_d == 0] <- NA # Replace 0 with NA for white tiles
cm_d$Reference <- reverse.levels(cm_d$Reference) # diagonal starts at top left
cm_d$ref_freq <- cm_d$Freq * ifelse(is.na(cm_d$diag),-1,1)
# plotting the matrix
cm_d_p <- ggplot(data = cm_d, aes(x = Prediction , y = Reference, fill = Freq))+
scale_x_discrete(position = "top") +
geom_tile( data = cm_d,aes(fill = ref_freq)) +
scale_fill_gradient2(guide = FALSE,low="red",high="yellow", midpoint = 0,na.value = 'white') +
geom_text(aes(label = Freq), color = 'black', size = 6)+
theme_light() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.position = "none",
panel.border = element_blank(),
plot.background = element_blank(),
axis.line = element_blank())
return(cm_d_p)
}
library(caret)
#> Loading required package: lattice
# simulated data
set.seed(23)
pred <- factor(sample(1:7,100,replace=T))
ref<- factor(sample(1:7,100,replace=T))
cm <- caret::confusionMatrix(pred,ref)
g <- plot.cm(cm)
g
#> Warning: Removed 8 rows containing missing values (geom_text).
Created on 2020-04-29 by the reprex package (v0.3.0)
来源:https://stackoverflow.com/questions/61504970/ggplot2-heatmap-2-different-color-schemes-confusion-matrix-matches-in-differe