Stacked density plots with pandas and seaborn

前端 未结 1 1020
日久生厌
日久生厌 2021-01-25 08:19

I am trying to obtain the following plot from a pandas data frame.

I am not sure how to combine seaborn with pandas for that task.

This is the dataframe

相关标签:
1条回答
  • 2021-01-25 08:40

    Here is a function to create a grid of kde plots ("joyplot") with one plot per dataframe column.

    import matplotlib.pyplot as plt
    import numpy as np
    from scipy.stats import gaussian_kde
    
    
    def joyplot_from_dataframe(data, cmap=None):
        mi, ma = np.nanmin(data.values), np.nanmax(data.values)
        minx = mi - (ma-mi)/5
        maxx = ma + (ma-mi)/5
        x = np.linspace(minx,maxx, 1000)
    
        n = len(data.columns)
    
        if not cmap:
            cmap = plt.cm.get_cmap("Blues")
        colors = cmap(np.linspace(.2,1,n))
    
        fig, axes = plt.subplots(nrows = n, sharex=True)
    
        for c, ax, color in zip(data.columns, axes, colors):
            y = data[c].values
            y = y[~np.isnan(y)]
            kde = gaussian_kde(y)
            ax.fill_between(x, kde(x), color=color)
            ax.yaxis.set_visible(False)
            for spine in ["left", "right", "top"]:
                ax.spines[spine].set_visible(False)
            ax.spines["bottom"].set_linewidth(2)
            ax.spines["bottom"].set_color(color)
            ax.margins(y=0)
            ax.tick_params(bottom=False)
    
        return fig, axes
    

    Use it as

    import pandas as pd
    
    data = pd.DataFrame({'a': np.random.randn(1000) + 1,
                  'b': np.random.randn(1000),
                  'c': np.random.rand(1000) + 10},        
                 columns=['a', 'b', 'c'])
    
    data.a[data.a.sample(100).index] = np.NaN
    data.b[data.b.sample(800).index] = np.NaN
    
    
    joyplot_from_dataframe(data)    
    plt.show()
    

    0 讨论(0)
提交回复
热议问题