I am trying to create a 3-line time series plot based on the following data , in a Week x Overload graph, where each Cluster is a different line.
I have multiple obs
I finally used the good old plot
with a design (subplots) that seems (to me) more readable.
df = pd.read_csv('TSplot.csv', sep='\t', index_col=0)
# Compute the min, mean and max (could also be other values)
grouped = df.groupby(["Cluster", "Week"]).agg({'Overload': ['min', 'mean', 'max']}).unstack("Cluster")
# Plot with sublot since it is more readable
axes = grouped.loc[:,('Overload', 'mean')].plot(subplots=True)
# Getting the color palette used
palette = sns.color_palette()
# Initializing an index to get each cluster and each color
index = 0
for ax in axes:
ax.fill_between(grouped.index, grouped.loc[:,('Overload', 'mean', index + 1)],
grouped.loc[:,('Overload', 'max', index + 1 )], alpha=.2, color=palette[index])
ax.fill_between(grouped.index,
grouped.loc[:,('Overload', 'min', index + 1)] , grouped.loc[:,('Overload', 'mean', index + 1)], alpha=.2, color=palette[index])
index +=1
Based off this incredible answer, I was able to create a monkey patch to beautifully do what you are looking for.
import pandas as pd
import seaborn as sns
import seaborn.timeseries
def _plot_range_band(*args, central_data=None, ci=None, data=None, **kwargs):
upper = data.max(axis=0)
lower = data.min(axis=0)
#import pdb; pdb.set_trace()
ci = np.asarray((lower, upper))
kwargs.update({"central_data": central_data, "ci": ci, "data": data})
seaborn.timeseries._plot_ci_band(*args, **kwargs)
seaborn.timeseries._plot_range_band = _plot_range_band
cluster_overload = pd.read_csv("TSplot.csv", delim_whitespace=True)
cluster_overload['Unit'] = cluster_overload.groupby(['Cluster','Week']).cumcount()
ax = sns.tsplot(time='Week',value="Overload", condition="Cluster", unit="Unit", data=cluster_overload,
err_style="range_band", n_boot=0)
Output Graph:
Notice that the shaded regions line up with the true maximum and minimums in the line graph!
If you figure out why the unit
variable is required, please let me know.
If you do not want them all on the same graph then:
import pandas as pd
import seaborn as sns
import seaborn.timeseries
def _plot_range_band(*args, central_data=None, ci=None, data=None, **kwargs):
upper = data.max(axis=0)
lower = data.min(axis=0)
#import pdb; pdb.set_trace()
ci = np.asarray((lower, upper))
kwargs.update({"central_data": central_data, "ci": ci, "data": data})
seaborn.timeseries._plot_ci_band(*args, **kwargs)
seaborn.timeseries._plot_range_band = _plot_range_band
cluster_overload = pd.read_csv("TSplot.csv", delim_whitespace=True)
cluster_overload['subindex'] = cluster_overload.groupby(['Cluster','Week']).cumcount()
def customPlot(*args,**kwargs):
df = kwargs.pop('data')
pivoted = df.pivot(index='subindex', columns='Week', values='Overload')
ax = sns.tsplot(pivoted.values, err_style="range_band", n_boot=0, color=kwargs['color'])
g = sns.FacetGrid(cluster_overload, row="Cluster", sharey=False, hue='Cluster', aspect=3)
g = g.map_dataframe(customPlot, 'Week', 'Overload','subindex')
Which produces the following, (you can obviously play with the aspect ratio if you think the proportions are off)
I really thought I would be able to do it with seaborn.tsplot
. But it does not quite look right. Here is the result I get with seaborn:
cluster_overload = pd.read_csv("TSplot.csv", delim_whitespace=True)
cluster_overload['Unit'] = cluster_overload.groupby(['Cluster','Week']).cumcount()
ax = sns.tsplot(time='Week',value="Overload", condition="Cluster", ci=100, unit="Unit", data=cluster_overload)
Outputs:
I am really confused as to why the unit
parameter is necessary since my understanding is that all the data is aggregated based on (time, condition)
The Seaborn Documentation defines unit
as
Field in the data DataFrame identifying the sampling unit (e.g. subject, neuron, etc.). The error representation will collapse over units at each time/condition observation. This has no role when data is an array.
I am not certain of the meaning of 'collapsed over'- especially since my definition wouldn't make it a required variable.
Anyways, here's the output if you want exactly what you discussed, not nearly as pretty. I am not sure how to manually shade in those regions, but please share if you figure it out.
cluster_overload = pd.read_csv("TSplot.csv", delim_whitespace=True)
grouped = cluster_overload.groupby(['Cluster','Week'],as_index=False)
stats = grouped.agg(['min','mean','max']).unstack().T
stats.index = stats.index.droplevel(0)
colors = ['b','g','r']
ax = stats.loc['mean'].plot(color=colors, alpha=0.8, linewidth=3)
stats.loc['max'].plot(ax=ax,color=colors,legend=False, alpha=0.3)
stats.loc['min'].plot(ax=ax,color=colors,legend=False, alpha=0.3)
Outputs: