问题
I would like to draw a sankey plot based on the below generated data structure where the edges between nodes correspond to an N3
column and their presence and thickness depend on the Value
column. For the below dummy data, the plot would look like this (but with edge thickness corresponding to the value in the Value
column). I haven't seen any example of sankey plots built like this. I've tried different options using the riverplot package, but as it doesn't seem to be able to handle the N3
column, it removes all duplicates of, e.g., edges between A and C.
set.seed(123)
mat <- matrix(rbinom(20,100,0.01),4,5,dimnames=list(LETTERS[1:4],letters[1:5]))
mat
# a b c d e
# A 0 3 1 1 0
# B 2 0 1 1 0
# C 1 1 3 0 0
# D 2 2 1 2 3
rowKey <- c("A"="N1","B"="N1","C"="N2","D"="N2")
edges = expand.grid(c(split(names(rowKey), rowKey), list(N3 = colnames(mat))))
edges2 = sapply(1:nrow(edges), function(i)
mat[row.names(mat) == edges$N1[i] | row.names(mat) == edges$N2[i],
colnames(mat) == edges$N3[i]])
edges$Value = colSums(edges2) * (colSums(edges2 > 0) == nrow(edges2))
edges
# N1 N2 N3 Value
#1 A C a 0
#2 B C a 3
#3 A D a 0
#4 B D a 4
#5 A C b 4
#6 B C b 0
#7 A D b 5
#8 B D b 0
#9 A C c 4
#10 B C c 4
#11 A D c 2
#12 B D c 2
#13 A C d 0
#14 B C d 0
#15 A D d 3
#16 B D d 3
#17 A C e 0
#18 B C e 0
#19 A D e 0
#20 B D e 0
# Plotting a sankey plot using the riverplot package
require(riverplot)
require(RColorBrewer)
nodes = data.frame(ID = unique(c(as.character(edges$N1),
as.character(edges$N2))), stringsAsFactors = FALSE)
nodes$x <- c(rep(1,2),rep(2,2))
nodes$y <- c(0:1,0:1)
palette = paste0(brewer.pal(3, "Set1"), "60")
styles = lapply(nodes$y, function(n) {
list(col = palette[n+1], lty = 0, textcol = "black")
})
names(styles) = nodes$ID
rp <- list(nodes=nodes, edges=edges[,-3], styles=styles)
class(rp) <- c(class(rp), "riverplot")
plot(rp, plot_area = 0.95, yscale=0.06, srt=0)
# Warning message:
# In checkedges(x2$edges, names(x2)) :
# duplicated edge information, removing 16 edges
回答1:
Here's a solution using the geom_parallel_sets()
from the ggforce package
devtools::install_github('thomasp85/ggforce')
edges1 <- gather_set_data(edges, 1:2)
ggplot(edges1, aes(x, id = id, split = y, value = Value)) +
geom_parallel_sets(aes(fill = N3), alpha = 0.3, axis.width = 0.1) +
geom_parallel_sets_axes(axis.width = 0.1) +
geom_parallel_sets_labels(colour = 'white')
来源:https://stackoverflow.com/questions/47097941/sankey-plot-where-edges-between-nodes-correspond-to-an-n3-column