visualize associations between two groups of data

前端 未结 4 941
清酒与你
清酒与你 2021-01-06 20:11

Where each datapoint has a pairing of A and B and there multiple entries in A and multiple entires in B. IE multiple syndromes and multiple diagnoses, although for each data

相关标签:
4条回答
  • 2021-01-06 20:31

    Since your data is bipartite, I would suggest plotting points in the first factor on one side, points in the other factor on the other, with lines between them, like so:

    enter image description here

    The code I used to generate this was:

    ## Make up data.
    data <- data.frame(X1=sample(state.region, 10),
                       X2=sample(state.region, 10))
    
    ## Set up plot window.
    plot(0, xlim=c(0,1), ylim=c(0,1),
         type="n", axes=FALSE, xlab="", ylab="")
    
    factor.to.int <- function(f) {
      (as.integer(f) - 1) / (length(levels(f)) - 1)
    }
    
    segments(factor.to.int(data$X1), 0, factor.to.int(data$X2), 1,
             col=data$X1)
    axis(1, at = seq(0, 1, by = 1 / (length(levels(data$X1)) - 1)),
         labels = levels(data$X1))
    axis(3, at = seq(0, 1, by = 1 / (length(levels(data$X2)) - 1)),
         labels = levels(data$X2))
    
    0 讨论(0)
  • 2021-01-06 20:39

    This is what I do. A darker colour indicates a more important combination of A and B.

    dataset <- data.frame(A = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE), B = sample(LETTERS[1:5], 200, prob = runif(5), replace = TRUE))
    Counts <- as.data.frame(with(dataset, table(A, B)))
    library(ggplot2)
    ggplot(Counts, aes(x = A, y = B, fill = Freq)) + geom_tile() + scale_fill_gradient(low = "white", high = "black")
    

    Or if you prefer lines

    library(ggplot2)
    dataset <- data.frame(A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE), B = sample(letters[1:5], 200, prob = runif(5), replace = TRUE))
    Counts <- as.data.frame(with(dataset, table(A, B)))
    Counts$X <- 0
    Counts$Xend <- 1
    Counts$Y <- as.numeric(Counts$A)
    Counts$Yend <- as.numeric(Counts$B)
    ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend, size = Freq)) +
    geom_segment() + scale_x_continuous(breaks = 0:1, labels = c("A", "B")) + 
    scale_y_continuous(breaks = 1:5, labels = letters[1:5])
    

    This third options add labels to the data points using geom_text().

    library(ggplot2)
    dataset <- data.frame(
        A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE), 
        B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
    )
    Counts <- as.data.frame(with(dataset, table(A, B)))
    Counts$X <- 0
    Counts$Xend <- 1
    Counts$Y <- as.numeric(Counts$A)
    Counts$Yend <- as.numeric(Counts$B)
    ggplot(Counts, aes(x = X, xend = Xend, y = Y, yend = Yend)) + 
    geom_segment(aes(size = Freq)) + 
    scale_x_continuous(breaks = 0:1, labels = c("A", "B")) + 
    scale_y_continuous(breaks = -1) + 
    geom_text(aes(x = X, y = Y, label = A), colour = "red", size = 7, hjust = 1, vjust = 1) + 
    geom_text(aes(x = Xend, y = Yend, label = B), colour = "red", size = 7, hjust = 0, vjust = 0)
    
    0 讨论(0)
  • 2021-01-06 20:43

    Thanks! I think that the connectivity between elements in each class is best visualized by the link graph examples given by both Jonathon and Thierry. Thierry's 2nd which shows the magnitude is definitely where i will start.

    update thanks everyone for you ideas and tips!

    I came acrossthe bipartite package that has functions to visualize this kind of data. I think its a clean visualization of the relationships I am trying to show.

    did:

        library(bipartite)
        dataset <- data.frame(
             A = sample(letters[1:5], 200, prob = runif(5), replace = TRUE), 
             B = sample(LETTERS[20:26], 200, prob = runif(7), replace = TRUE)
         )
        datamat <- as.matrix(table(dataset$A, dataset$B))
        visweb(datamat, text = "interaction", textsize = .8)
    

    giving: visweb result

    couldnt put image in as a new user :(

    0 讨论(0)
  • 2021-01-06 20:46

    Maybe mosaicplot:

    X <- structure(list(
      ID = 1:50,
      A = structure(c(6L, 1L, 2L, 4L, 4L, 3L, 7L, 1L, 3L, 4L, 1L, 1L, 4L, 4L, 1L, 3L, 5L, 5L, 2L, 6L, 6L, 1L, 1L, 1L, 3L, 3L, 5L, 6L, 3L, 2L, 8L, 5L, 2L, 6L, 5L, 2L, 8L, 3L, 5L, 1L, 1L, 6L, 2L, 8L, 8L, 4L, 1L, 2L, 6L, 2L), .Label = c("AA","BB", "CC", "DD", "FF", "GG", "HH", "II"), class = "factor"),
      B = structure(c(3L, 2L, 6L, 2L, 3L, 6L, 8L, 3L, 1L, 8L, 6L, 3L, 2L, 6L, 7L, 8L, 2L, 6L, 5L, 5L, 2L, 1L, 3L, 2L, 5L, 3L, 1L, 4L, 3L, 8L, 3L, 1L, 2L, 7L, 1L, 5L, 1L, 7L, 5L, 6L, 8L, 5L, 4L, 4L, 2L, 2L, 4L, 5L, 3L, 3L), .Label = c("RR", "SS", "TT", "UU", "VV", "XX", "YY", "ZZ"), class = "factor")
      ), .Names = c("ID", "A", "B"), class = "data.frame", row.names = c(NA, -50L)
    )
    
    mosaicplot(with(X,table(A,B)))
    

    For you example dataset:

    mosaicplot

    0 讨论(0)
提交回复
热议问题