ValueError: scale < 0 during normalization by using gaussian distribution function

问题

I'm trying to read my text file and extract 3 main parameters and put them in separate list and apply normalizing on lists of parameters which are (Temperature, Speed, Acceleration) after assigning Gaussian distribution function. For getting good result I split up positive and negative numbers of each parameters' list and apply gaussian distribution function and pick mean value of negative numbers as the real Minimum and pick mean value of positive numbers as the real Maximum instead of directly find Min and Max values in main list of these parameters which could repeat few times due to they're not in desired confidence interval. The problem is I faced RunTimeWarning error which I avoided already but still I have below error(s) which I don't have any clue how I can solve them includes ValueError: scale <0 , hope that someone has good idea about solution for errors ot better way to apply normalization by using gaussian distribution function Thanks for your attention:

File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 265, in main
wait=args.wait)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 258, in handle_args
debug_main(addr, name, kind, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 45, in debug_main
run_file(address, name, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 79, in run_file
run(argv, addr, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 140, in _run
_pydevd.main()
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1925, in main
debugger.connect(host, port)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1283, in run
return self._exec(is_module, entry_point_fn, module_name, file, globals, locals)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1290, in _exec
pydev_imports.execfile(file, globals, locals)  # execute the script
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\_pydev_imps\_pydev_execfile.py", line 25, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "p:\Desktop\correctt\news.py", line 142, in <module>
plotgaussianfunction(t_p_mean, t_sigma_Positive)
File "p:\Desktop\correctt\news.py", line 58, in plotgaussianfunction
s = np.random.normal(mu, sigma,1000)
File "mtrand.pyx", line 1656, in mtrand.RandomState.normal
ValueError: scale < 0

So my code is:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import warnings
warnings.filterwarnings("ignore",category =RuntimeWarning)

df = pd.read_csv('D:/me.txt', header=None)
id_set = df[df.index % 4 == 0].astype('int').values
speed = df[df.index % 4 == 1].values
acceleration = df[df.index % 4 == 2].values
temperature = df[df.index % 4 == 3].values

m_data={'p_Speed': s_p_results[:,0],'n_Speed': s_n_results[:,0], 'p_Acceleration': a_p_results[:,0],'n_Acceleration': a_n_results[:,0], 'p_Temperature': t_p_results[:,0],'n_Temperature': t_n_results[:,0]}
m_main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])

data = {'Speed': speed[:,0], 'Acceleration': acceleration[:,0], 'Temperature': temperature[:,0]}
main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
main_data = main_data.replace([np.inf, -np.inf], np.nan)

def normalize(value, min_value, max_value, min_norm, max_norm):
new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
return new_value

def createpositiveandnegativelist(listtocreate):
l_negative = []
l_positive = []
for value in listtocreate:
    if (value < 0):
        l_negative.append(value)
    elif (value > 0):
        l_positive.append(value)
#print(t_negative)
#print(t_positive)
return l_negative,l_positive

def calculatemean(listtocalculate):
return sum(listtocalculate)/len(listtocalculate)

def plotgaussianfunction(mu,sigma):
s = np.random.normal(mu, sigma,1000)
abs(mu - np.mean(s))<0.01
abs(sigma - np.std(s,ddof=1))<0.01
#count, bins, ignored = plt.hist(s,30,density=True)
#plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#plt.show()
return


def plotboundedCI(s, mu, sigma, lists):
plt.figure()

count, bins, ignored = plt.hist(s,30,density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#confidential interval calculation
ci = scipy.stats.norm.interval(0.68, loc = mu, scale = sigma)
#confidence interval for left line
one_x12, one_y12 = [ci[0],ci[0]], [0,3]
#confidence interval for right line
two_x12, two_y12 = [ci[1],ci[1]], [0,3]

plt.title("Gaussian 68% Confidence Interval", fontsize=12, color='black', loc='left', style='italic')
plt.plot(one_x12, one_y12, two_x12, two_y12, marker = 'o')
plt.show()


results = []
for value in lists:
    if(ci[0]< value <ci[1]):
        results.append(value)
    else:
        #print("NOT WANTED: ",value)
        pass

return results


t_negative, t_positive = createpositiveandnegativelist(temperature)
a_negative, a_positive = createpositiveandnegativelist(acceleration)
s_negative, s_positive = createpositiveandnegativelist(speed)

#calculating the mean value
t_p_mean = calculatemean(t_positive)
a_p_mean = calculatemean(a_positive)
s_p_mean = calculatemean(s_positive)
t_n_mean = calculatemean(t_negative)
a_n_mean = calculatemean(a_negative)
s_n_mean = calculatemean(s_negative)

#calculating the sigma value
t_sigma_Negative = np.std(t_negative)
t_sigma_Positive = np.std(t_positive)
a_sigma_Negative = np.std(t_negative)
a_sigma_Positive = np.std(t_positive)
s_sigma_Negative = np.std(t_negative)
s_sigma_Positive = np.std(t_positive)

#plot the gaussian function with histograms
plotgaussianfunction(t_p_mean, t_sigma_Positive)
plotgaussianfunction(t_n_mean, t_sigma_Negative)
plotgaussianfunction(a_p_mean, a_sigma_Positive)
plotgaussianfunction(a_n_mean, a_sigma_Negative)
plotgaussianfunction(s_p_mean, s_sigma_Positive)
plotgaussianfunction(s_n_mean, s_sigma_Negative)

#normalization
t_p_s = np.random.normal(t_p_mean, t_sigma_Positive,1000)
t_n_s = np.random.normal(t_n_mean, t_sigma_Negative,1000)
a_p_s = np.random.normal(a_p_mean, a_sigma_Positive,1000)
a_n_s = np.random.normal(a_n_mean, a_sigma_Negative,1000)
s_p_s = np.random.normal(s_p_mean, s_sigma_Positive,1000)
s_n_s = np.random.normal(s_n_mean, s_sigma_Negative,1000)

#histograms minus the outliers
t_p_results = plotboundedCI(t_p_s, t_p_mean, t_sigma_Positive, t_positive)
t_n_results = plotboundedCI(t_n_s, t_n_mean, t_sigma_Negative, t_negative)
a_p_results = plotboundedCI(a_p_s, a_p_mean, a_sigma_Positive, a_positive)
a_n_results = plotboundedCI(a_n_s, a_n_mean, a_sigma_Negative, a_negative)
s_p_results = plotboundedCI(s_p_s, s_p_mean, s_sigma_Positive, s_positive)
s_n_results = plotboundedCI(s_n_s, s_n_mean, s_sigma_Negative, s_negative)

Note: I have some missing data(nan or inf) in my list of values which are already replaced by zero! but considering that when I have no missing values in my list of parameters , the code works!

回答1:

from documentation of numpy.random.normal:

Parameters:

loc : float or array_like of floats

Mean (“centre”) of the distribution.

scale : float or array_like of floats Standard deviation (spread or “width”) of the distribution.

size : int or tuple of ints, optional Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. If size is None (default), a single value is returned if loc and scale are both scalars. Otherwise, np.broadcast(loc, scale).size samples are drawn.

the scale is the Standard deviation of the distribution hence it can not be negative. Hence the error you get: ValueError: scale < 0

you may want to check the sign of this parameter. give it a try with:

s = np.random.normal(mu, np.abs(sigma),1000)

来源：https://stackoverflow.com/questions/53818422/valueerror-scale-0-during-normalization-by-using-gaussian-distribution-functi

标签

python

normalization

gaussian

valueerror