问题
I am looking to generate a ternary plot with binned polygons (either triangle or hex, preferably in a ggplot framework) where the color of the polygon is a binned mean or median of selected values.
This script gets very close, but triangle cell color is representative of a number of observations, rather than a mean value of observations contained within the triangle cell.
So rather than soley providing X,Y, and Z; I would provide a fourth fill/value variable is provided from which binned means or medians are calculated and represented as a color on a gradient.
Akin to the below image, though in a ternary framework with an additional axis. Image of stat_summary_hex() plot with color as binned mean value
I appreciate the help. Thank you.
Dummy data to begin with:
#load libraries
devtools::install_git('https://bitbucket.org/nicholasehamilton/ggtern')
library(ggtern)
library(ggplot)
# example data
sig <- matrix(c(3,0,0,2),2,2)
data <- data.frame(mvrnorm(n=10000, rep(2, 2), sig))
data$X1 <- data$X1/max(data$X1)
data$X2 <- data$X2/max(data$X2)
data$X1[which(data$X1<0)] <- runif(length(data$X1[which(data$X1<0)]))
data$X2[which(data$X2<0)] <- runif(length(data$X2[which(data$X2<0)]))
data$X3 <- with(data, 1-X1-X2)
data <- data[data$X3 >= 0,]
data$X4 <- rnorm(dim(data)[1])
data <- data.frame(X = data$X1, Y = data$X2, Z = data$X3, fill_variable = data$X4)
str(data)
# simple ternary plot where color of point is the fill variable value
ggtern(data,aes(X,Y,Z, color = fill_variable))+geom_point()
# 2D example, not a ternary though. Keep in mind in geom_hex Z is the fill, not the additional axis like ggtern
ggplot(data,aes(X,Y))+stat_summary_hex(aes(z = fill_variable))
回答1:
This code isn't cleaned up, but it's a good jumping off point. Credit for original goes the OP referenced in the first question.
I made some minor adjustments to the count_bin function to instead of doing bin counts, it does bin medians. Use at your own risk and please point out any bugs. For my implementation this reports 0 for NA bins.
Example:
Function for binned median (pardon the name, just saves time):
count_bin <- function(data, minT, maxT, minR, maxR, minL, maxL) {
ret <- data
ret <- with(ret, ret[minT <= X1 & X1 < maxT,])
ret <- with(ret, ret[minL <= X2 & X2 < maxL,])
ret <- with(ret, ret[minR <= X3 & X3 < maxR,])
if(is.na(median(ret$VAR))) {
ret <- 0
} else {
ret <- median(ret$VAR)
}
ret
}
Modified heatmap function:
heatmap3d <- function(data, inc, logscale=FALSE, text=FALSE, plot_corner=TRUE) {
# When plot_corner is FALSE, corner_cutoff determines where to stop plotting
corner_cutoff = 1
# When plot_corner is FALSE, corner_number toggles display of obervations in the corners
# This only has an effect when text==FALSE
corner_numbers = TRUE
count <- 1
points <- data.frame()
for (z in seq(0,1,inc)) {
x <- 1- z
y <- 0
while (x>0) {
points <- rbind(points, c(count, x, y, z))
x <- round(x - inc, digits=2)
y <- round(y + inc, digits=2)
count <- count + 1
}
points <- rbind(points, c(count, x, y, z))
count <- count + 1
}
colnames(points) = c("IDPoint","T","L","R")
#str(points)
#str(count)
# base <- ggtern(data=points,aes(L,T,R)) +
# theme_bw() + theme_hidetitles() + theme_hidearrows() +
# geom_point(shape=21,size=10,color="blue",fill="white") +
# geom_text(aes(label=IDPoint),color="blue")
# print(base)
polygons <- data.frame()
c <- 1
# Normal triangles
for (p in points$IDPoint) {
if (is.element(p, points$IDPoint[points$T==0])) {
next
} else {
pL <- points$L[points$IDPoint==p]
pT <- points$T[points$IDPoint==p]
pR <- points$R[points$IDPoint==p]
polygons <- rbind(polygons,
c(c,p),
c(c,points$IDPoint[abs(points$L-pL) < inc/2 & abs(points$R-pR-inc) < inc/2]),
c(c,points$IDPoint[abs(points$L-pL-inc) < inc/2 & abs(points$R-pR) < inc/2]))
c <- c + 1
}
}
#str(c)
# Upside down triangles
for (p in points$IDPoint) {
if (!is.element(p, points$IDPoint[points$T==0])) {
if (!is.element(p, points$IDPoint[points$L==0])) {
pL <- points$L[points$IDPoint==p]
pT <- points$T[points$IDPoint==p]
pR <- points$R[points$IDPoint==p]
polygons <- rbind(polygons,
c(c,p),
c(c,points$IDPoint[abs(points$T-pT) < inc/2 & abs(points$R-pR-inc) < inc/2]),
c(c,points$IDPoint[abs(points$L-pL) < inc/2 & abs(points$R-pR-inc) < inc/2]))
c <- c + 1
}
}
}
#str(c)
# IMPORTANT FOR CORRECT ORDERING.
polygons$PointOrder <- 1:nrow(polygons)
colnames(polygons) = c("IDLabel","IDPoint","PointOrder")
df.tr <- merge(polygons,points)
Labs = ddply(df.tr,"IDLabel",function(x){c(c(mean(x$T),mean(x$L),mean(x$R)))})
colnames(Labs) = c("Label","T","L","R")
#str(Labs)
#triangles <- ggtern(data=df.tr,aes(L,T,R)) +
# geom_polygon(aes(group=IDLabel),color="black",alpha=0.25) +
# geom_text(data=Labs,aes(label=Label),size=4,color="black") +
# theme_bw()
# print(triangles)
bins <- ddply(df.tr, .(IDLabel), summarize,
maxT=max(T),
maxL=max(L),
maxR=max(R),
minT=min(T),
minL=min(L),
minR=min(R))
#str(bins)
count <- ddply(bins, .(IDLabel), summarize,
N=count_bin(data, minT, maxT, minR, maxR, minL, maxL)
#N=mean(data)
)
df <- join(df.tr, count, by="IDLabel")
str(count)
Labs = ddply(df,.(IDLabel,N),function(x){c(c(mean(x$T),mean(x$L),mean(x$R)))})
colnames(Labs) = c("Label","N","T","L","R")
if (plot_corner==FALSE){
corner <- ddply(df, .(IDPoint, IDLabel), summarize, maxperc=max(T,L,R))
corner <- corner$IDLabel[corner$maxperc>=corner_cutoff]
df$N[is.element(df$IDLabel, corner)] <- 0
if (text==FALSE & corner_numbers==TRUE) {
Labs$N[!is.element(Labs$Label, corner)] <- ""
text=TRUE
}
}
heat <- ggtern(data=df,aes(L,T,R)) +
geom_polygon(aes(fill=N,group=IDLabel),color="black",alpha=1, size = 0.1,show.legend = F)
if (logscale == TRUE) {
heat <- heat + scale_fill_gradient(name="Observations", trans = "log",
low=palette[2], high=palette[4])
} else {
heat <- heat + scale_fill_distiller(name="Median Value",
palette = "Spectral")
}
heat <<- heat +
Tlab("x") +
Rlab("y") +
Llab("z") +
theme_bw() +
theme(axis.tern.arrowsep=unit(0.02,"npc"), #0.01npc away from ticks ticklength
axis.tern.arrowstart=0.25,axis.tern.arrowfinish=0.75,
axis.tern.text=element_text(size=12),
axis.tern.arrow.text.T=element_text(vjust=-1),validate = F,
axis.tern.arrow.text.R=element_text(vjust=2),
axis.tern.arrow.text.L=element_text(vjust=-1),
#axis.tern.arrow.text=element_text(size=12),
axis.tern.title=element_text(size=15),
axis.tern.text=element_blank(),
axis.tern.arrow.text=element_blank())
if (text==FALSE) {
print(heat)
} else {
print(heat + geom_text(data=Labs,aes(label=N),size=3,color="white"))
}
}
Dummy example:
# dummy example
sig <- matrix(c(3,3,3,3),3,3)
data <- data.frame(mvrnorm(n=10000, rep(2, 2), sig))
data$X1[which(data$X1<0)] <- runif(length(data$X1[which(data$X1<0)]))
data$X2[which(data$X2<0)] <- runif(length(data$X2[which(data$X2<0)]))
data$X3 <- with(data, 1-X1-X2)
data <- data[data$X3 >= 0,]
data$VAR <- rnorm(dim(data)[1])
data <- data.frame(X = data$X1, Y = data$X2, Z = data$X3, fill_variable = data$X4)
str(data)
ggtern(data,aes(X1,
X2,
X3, color = VAR))+geom_point(size = 5)+scale_color_distiller(palette = "Spectral")
heatmap3d(data,.05)
来源:https://stackoverflow.com/questions/47801768/ternary-plots-with-binned-means-medians