Joining a dendrogram and a heatmap

后端 未结 2 412
梦谈多话
梦谈多话 2021-02-03 23:17

I have a heatmap (gene expression from a set of samples):

set.seed(10)
mat <- matrix(rnorm(24*10,mean=1,sd=2),nrow=24,ncol=10,dimnames=list(paste         


        
2条回答
  •  栀梦
    栀梦 (楼主)
    2021-02-03 23:53

    Here is a (tentative) solution with the gene and sample dendrograms. It is a rather lacking solution, because I haven't managed to find a good way to get plot_grid to properly align all subplots, while automatically adjusting the figure proportions and distances between the sub-plots. In this version, the way to produce the overall figure was to add "padding subplots" (the flanking NULL entries in the call to plot_grid) and also to manually fine-tune the margins of the sub-plots (which strangely seem to be coupled in the various subplots). Once again, this is a rather lacking solution, hopefully I can manage to post a definitive version soon.

    library(plyr)
    library(reshape2)
    library(dplyr)
    library(ggplot2)
    library(ggdendro)
    library(gridExtra)
    library(dendextend)
    
    set.seed(10)
    
    # The source data
    mat <- matrix(rnorm(24 * 10, mean = 1, sd = 2), 
                  nrow = 24, ncol = 10, 
                  dimnames = list(paste("g", 1:24, sep = ""), 
                                  paste("sample", 1:10, sep = "")))
    
    getDendrogram <- function(data_mat, depth_cutoff) {
    
        # Obtain the dendrogram
        full_dend <- as.dendrogram(hclust(dist(data_mat)))
    
        # Cut the dendrogram
        h_c_cut <- cut(full_dend, h = depth_cutoff)
        dend_cut <- as.dendrogram(h_c_cut$upper)
        dend_cut <- hang.dendrogram(dend_cut)
        # Format to extend the branches (optional)
        dend_cut <- hang.dendrogram(dend_cut, hang = -1) 
        dend_data_cut <- dendro_data(dend_cut)
    
        # Extract the names assigned to the clusters (e.g., "Branch 1", "Branch 2", ...)
        cluster_names <- as.character(dend_data_cut$labels$label)
        # Extract the entries that belong to each group (using the 'labels' function)
        lst_entries_in_clusters <- h_c_cut$lower %>% 
            lapply(labels) %>% 
            setNames(cluster_names)
    
        # The dendrogram data for plotting
        segment_data <- segment(dend_data_cut)
    
        # Extract the positions of the clusters (by getting the positions of the 
        # leafs); data is already in the same order as in the cluster name
        cluster_positions <- segment_data[segment_data$yend == 0, "x"]
        cluster_pos_table <- data.frame(position = cluster_positions, 
                                        cluster = cluster_names)
    
        list(
            full_dend = full_dend, 
            dend_data_cut = dend_data_cut, 
    
            lst_entries_in_clusters = lst_entries_in_clusters, 
            segment_data = segment_data, 
            cluster_pos_table = cluster_pos_table
        )
    }
    
    # Cut the dendrograms
    gene_depth_cutoff <- 11
    sample_depth_cutof <- 12
    
    # Obtain the dendrograms
    gene_dend_data <- getDendrogram(mat, gene_depth_cutoff)
    sample_dend_data <- getDendrogram(t(mat), sample_depth_cutof)
    
    # Specify the positions for the genes and samples, accounting for the clusters
    gene_pos_table <- gene_dend_data$lst_entries_in_clusters %>%
        ldply(function(ss) data.frame(gene = ss), .id = "gene_cluster") %>%
        mutate(y_center = 1:nrow(.), 
               height = 1)
    # > head(gene_pos_table, 3)
    #    cluster gene y_center height
    # 1 Branch 1  g11        1      1
    # 2 Branch 1  g20        2      1
    # 3 Branch 1  g12        3      1
    
    # Specify the positions for the samples, accounting for the clusters
    sample_pos_table <- sample_dend_data$lst_entries_in_clusters %>%
        ldply(function(ss) data.frame(sample = ss), .id = "sample_cluster") %>%
        mutate(x_center = 1:nrow(.), 
               width = 1)
    
    # Neglecting the gap parameters
    heatmap_data <- mat %>% 
        reshape2::melt(value.name = "expr", varnames = c("gene", "sample")) %>%
        left_join(gene_pos_table) %>%
        left_join(sample_pos_table)
    
    # Limits for the vertical axes (genes / clusters)
    axis_spacing <- 0.1 * c(-1, 1)
    gene_axis_limits <- with(
        gene_pos_table, 
        c(min(y_center - 0.5 * height), max(y_center + 0.5 * height))) + axis_spacing
    
    sample_axis_limits <- with(
        sample_pos_table, 
        c(min(x_center - 0.5 * width), max(x_center + 0.5 * width))) + axis_spacing
    
    # For some reason, the margin of the various sub-plots end up being "coupled"; 
    # therefore, for now this requires some manual fine-tuning, 
    # which is obviously not ideal...
    # margin: top, right, bottom, and left
    margin_specs_hmap <- 1 * c(-2, -1, -1, -2)
    margin_specs_gene_dendr <- 1.7 * c(-1, -2, -1, -1)
    margin_specs_sample_dendr <- 1.7 * c(-2, -1, -2, -1)
    
    # Heatmap plot
    plt_hmap <- ggplot(heatmap_data, 
                       aes(x = x_center, y = y_center, fill = expr, 
                           height = height, width = width)) + 
        geom_tile() +
        scale_fill_gradient2("expr", high = "darkred", low = "darkblue") +
        scale_x_continuous(breaks = sample_pos_table$x_center, 
                           labels = sample_pos_table$sample, 
                           expand = c(0.01, 0.01)) + 
        scale_y_continuous(breaks = gene_pos_table$y_center, 
                           labels = gene_pos_table$gene, 
                           limits = gene_axis_limits, 
                           expand = c(0.01, 0.01), 
                           position = "right") + 
        labs(x = "Sample", y = "Gene") +
        theme_bw() +
        theme(axis.text.x = element_text(size = rel(1), hjust = 1, angle = 45), 
              axis.text.y = element_text(size = rel(0.7)), 
              legend.position = "none", 
              plot.margin = unit(margin_specs_hmap, "cm"), 
              panel.grid.minor = element_blank())
    
    # Dendrogram plots
    plt_gene_dendr <- ggplot(gene_dend_data$segment_data) + 
        geom_segment(aes(x = y, y = x, xend = yend, yend = xend)) + # inverted coordinates
        scale_x_reverse(expand = c(0, 0.5)) + 
        scale_y_continuous(breaks = gene_dend_data$cluster_pos_table$position, 
                           labels = gene_dend_data$cluster_pos_table$cluster, 
                           limits = gene_axis_limits, 
                           expand = c(0, 0)) + 
        labs(x = "Distance", y = "", colour = "", size = "") +
        theme_bw() + 
        theme(plot.margin = unit(margin_specs_gene_dendr, "cm"), 
              panel.grid.minor = element_blank())
    
    plt_sample_dendr <- ggplot(sample_dend_data$segment_data) + 
        geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
        scale_y_continuous(expand = c(0, 0.5), 
                           position = "right") + 
        scale_x_continuous(breaks = sample_dend_data$cluster_pos_table$position, 
                           labels = sample_dend_data$cluster_pos_table$cluster, 
                           limits = sample_axis_limits, 
                           position = "top", 
                           expand = c(0, 0)) + 
        labs(x = "", y = "Distance", colour = "", size = "") +
        theme_bw() + 
        theme(plot.margin = unit(margin_specs_sample_dendr, "cm"), 
              panel.grid.minor = element_blank(), 
              axis.text.x = element_text(size = rel(0.8), angle = 45, hjust = 0))
    
    library(cowplot)
    
    final_plot <- plot_grid(
        NULL,    NULL,           NULL,             NULL, 
        NULL,    NULL,           plt_sample_dendr, NULL, 
        NULL,    plt_gene_dendr, plt_hmap,         NULL, 
        NULL,    NULL,           NULL,             NULL, 
        nrow = 4, ncol = 4, align = "hv", 
        rel_heights = c(0.5, 1, 2, 0.5), 
        rel_widths = c(0.5, 1, 2, 0.5)
    )
    

提交回复
热议问题