I have data which is of the gaussian form when plotted as histogram. I want to plot a gaussian curve on top of the histogram to see how good the data is. I am using pyplot f
An old post I know, but wanted to contribute my code for doing this, which simply does the 'fix by area' trick:
from scipy.stats import norm
from numpy import linspace
from pylab import plot,show,hist
def PlotHistNorm(data, log=False):
# distribution fitting
param = norm.fit(data)
mean = param[0]
sd = param[1]
#Set large limits
xlims = [-6*sd+mean, 6*sd+mean]
#Plot histogram
histdata = hist(data,bins=12,alpha=.3,log=log)
#Generate X points
x = linspace(xlims[0],xlims[1],500)
#Get Y points via Normal PDF with fitted parameters
pdf_fitted = norm.pdf(x,loc=mean,scale=sd)
#Get histogram data, in this case bin edges
xh = [0.5 * (histdata[1][r] + histdata[1][r+1]) for r in xrange(len(histdata[1])-1)]
#Get bin width from this
binwidth = (max(xh) - min(xh)) / len(histdata[1])
#Scale the fitted PDF by area of the histogram
pdf_fitted = pdf_fitted * (len(data) * binwidth)
#Plot PDF
plot(x,pdf_fitted,'r-')