Where each datapoint has a pairing of A and B and there multiple entries in A and multiple entires in B. IE multiple syndromes and multiple diagnoses, although for each data
Since your data is bipartite, I would suggest plotting points in the first factor on one side, points in the other factor on the other, with lines between them, like so:
The code I used to generate this was:
## Make up data.
data <- data.frame(X1=sample(state.region, 10),
X2=sample(state.region, 10))
## Set up plot window.
plot(0, xlim=c(0,1), ylim=c(0,1),
type="n", axes=FALSE, xlab="", ylab="")
factor.to.int <- function(f) {
(as.integer(f) - 1) / (length(levels(f)) - 1)
}
segments(factor.to.int(data$X1), 0, factor.to.int(data$X2), 1,
col=data$X1)
axis(1, at = seq(0, 1, by = 1 / (length(levels(data$X1)) - 1)),
labels = levels(data$X1))
axis(3, at = seq(0, 1, by = 1 / (length(levels(data$X2)) - 1)),
labels = levels(data$X2))
This is what I do. A darker colour indicates a more important combination of A and B.
dataset <- data.frame(A = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE), B = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE))
Counts <- as.data.frame(with(dataset, table(A, B)))
library(ggplot2)
ggplot(Counts, aes(x = A, y = B, fill = Freq)) + geom_tile() + scale_fill_gradient(low = "white", high = "black")
Or if you prefer lines
library(ggplot2)
dataset <- data.frame(A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE), B = sample(letters[1:5], 200, prob = runif(5), replace = TRUE))
Counts <- as.data.frame(with(dataset, table(A, B)))
Counts$X <- 0
Counts$Xend <- 1
Counts$Y <- as.numeric(Counts$A)
Counts$Yend <- as.numeric(Counts$B)
ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend, size = Freq)) +
geom_segment() + scale_x_continuous(breaks = 0:1, labels = c("A", "B")) +
scale_y_continuous(breaks = 1:5, labels = letters[1:5])
This third options add labels to the data points using geom_text().
library(ggplot2)
dataset <- data.frame(
A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE),
B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
)
Counts <- as.data.frame(with(dataset, table(A, B)))
Counts$X <- 0
Counts$Xend <- 1
Counts$Y <- as.numeric(Counts$A)
Counts$Yend <- as.numeric(Counts$B)
ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend)) +
geom_segment(aes(size = Freq)) +
scale_x_continuous(breaks = 0:1, labels = c("A", "B")) +
scale_y_continuous(breaks = -1) +
geom_text(aes(x = X, y = Y, label = A), colour = "red", size = 7, hjust = 1, vjust = 1) +
geom_text(aes(x = Xend, y = Yend, label = B), colour = "red", size = 7, hjust = 0, vjust = 0)
Thanks! I think that the connectivity between elements in each class is best visualized by the link graph examples given by both Jonathon and Thierry. Thierry's 2nd which shows the magnitude is definitely where i will start.
update thanks everyone for you ideas and tips!
I came acrossthe bipartite package that has functions to visualize this kind of data. I think its a clean visualization of the relationships I am trying to show.
did:
library(bipartite)
dataset <- data.frame(
A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE),
B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
)
datamat <- as.matrix(table(dataset$A, dataset$B))
visweb(datamat, text = "interaction", textsize = .8)
giving: visweb result
couldnt put image in as a new user :(
Maybe mosaicplot:
X <- structure(list(
ID = 1:50,
A = structure(c(6L, 1L, 2L, 4L, 4L, 3L, 7L, 1L, 3L, 4L, 1L, 1L, 4L, 4L, 1L, 3L, 5L, 5L, 2L, 6L, 6L, 1L, 1L, 1L, 3L, 3L, 5L, 6L, 3L, 2L, 8L, 5L, 2L, 6L, 5L, 2L, 8L, 3L, 5L, 1L, 1L, 6L, 2L, 8L, 8L, 4L, 1L, 2L, 6L, 2L), .Label = c("AA","BB", "CC", "DD", "FF", "GG", "HH", "II"), class = "factor"),
B = structure(c(3L, 2L, 6L, 2L, 3L, 6L, 8L, 3L, 1L, 8L, 6L, 3L, 2L, 6L, 7L, 8L, 2L, 6L, 5L, 5L, 2L, 1L, 3L, 2L, 5L, 3L, 1L, 4L, 3L, 8L, 3L, 1L, 2L, 7L, 1L, 5L, 1L, 7L, 5L, 6L, 8L, 5L, 4L, 4L, 2L, 2L, 4L, 5L, 3L, 3L), .Label = c("RR", "SS", "TT", "UU", "VV", "XX", "YY", "ZZ"), class = "factor")
), .Names = c("ID", "A", "B"), class = "data.frame", row.names = c(NA, -50L)
)
mosaicplot(with(X,table(A,B)))
For you example dataset: