I am trying to obtain the following plot from a pandas data frame.
I am not sure how to combine seaborn with pandas for that task.
This is the dataframe
Here is a function to create a grid of kde plots ("joyplot") with one plot per dataframe column.
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
def joyplot_from_dataframe(data, cmap=None):
mi, ma = np.nanmin(data.values), np.nanmax(data.values)
minx = mi - (ma-mi)/5
maxx = ma + (ma-mi)/5
x = np.linspace(minx,maxx, 1000)
n = len(data.columns)
if not cmap:
cmap = plt.cm.get_cmap("Blues")
colors = cmap(np.linspace(.2,1,n))
fig, axes = plt.subplots(nrows = n, sharex=True)
for c, ax, color in zip(data.columns, axes, colors):
y = data[c].values
y = y[~np.isnan(y)]
kde = gaussian_kde(y)
ax.fill_between(x, kde(x), color=color)
ax.yaxis.set_visible(False)
for spine in ["left", "right", "top"]:
ax.spines[spine].set_visible(False)
ax.spines["bottom"].set_linewidth(2)
ax.spines["bottom"].set_color(color)
ax.margins(y=0)
ax.tick_params(bottom=False)
return fig, axes
Use it as
import pandas as pd
data = pd.DataFrame({'a': np.random.randn(1000) + 1,
'b': np.random.randn(1000),
'c': np.random.rand(1000) + 10},
columns=['a', 'b', 'c'])
data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN
joyplot_from_dataframe(data)
plt.show()