My problem:
I have a dataset which is a large JSON file. I read it and store it in the trainList
variable.
Next, I pre-process
Here's a fix to invoketheshell's buggy code (which currently appears as the accepted answer):
def performance_measure(y_actual, y_hat):
TP = 0
FP = 0
TN = 0
FN = 0
for i in range(len(y_hat)):
if y_actual[i] == y_hat[i]==1:
TP += 1
if y_hat[i] == 1 and y_actual[i] == 0:
FP += 1
if y_hat[i] == y_actual[i] == 0:
TN +=1
if y_hat[i] == 0 and y_actual[i] == 1:
FN +=1
return(TP, FP, TN, FN)
In the scikit-learn 'metrics' library there is a confusion_matrix method which gives you the desired output.
You can use any classifier that you want. Here I used the KNeighbors as example.
from sklearn import metrics, neighbors
clf = neighbors.KNeighborsClassifier()
X_test = ...
y_test = ...
expected = y_test
predicted = clf.predict(X_test)
conf_matrix = metrics.confusion_matrix(expected, predicted)
>>> print conf_matrix
>>> [[1403 87]
[ 56 3159]]
The docs: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html#sklearn.metrics.confusion_matrix
I wrote a version that works using only numpy. I hope it helps you.
import numpy as np
def perf_metrics_2X2(yobs, yhat):
"""
Returns the specificity, sensitivity, positive predictive value, and
negative predictive value
of a 2X2 table.
where:
0 = negative case
1 = positive case
Parameters
----------
yobs : array of positive and negative ``observed`` cases
yhat : array of positive and negative ``predicted`` cases
Returns
-------
sensitivity = TP / (TP+FN)
specificity = TN / (TN+FP)
pos_pred_val = TP/ (TP+FP)
neg_pred_val = TN/ (TN+FN)
Author: Julio Cardenas-Rodriguez
"""
TP = np.sum( yobs[yobs==1] == yhat[yobs==1] )
TN = np.sum( yobs[yobs==0] == yhat[yobs==0] )
FP = np.sum( yobs[yobs==1] == yhat[yobs==0] )
FN = np.sum( yobs[yobs==0] == yhat[yobs==1] )
sensitivity = TP / (TP+FN)
specificity = TN / (TN+FP)
pos_pred_val = TP/ (TP+FP)
neg_pred_val = TN/ (TN+FN)
return sensitivity, specificity, pos_pred_val, neg_pred_val
#FalseNegatives
test = pd.merge(Variables_test, Banknote_test,left_index=True, right_index=True)
Banknote_test_pred = pd.DataFrame(banknote_test_pred)
Banknote_test_pred.rename(columns={0 :'Predicted'}, inplace=True )
test = test.reset_index(drop=True).merge(Banknote_test_pred.reset_index(drop=True), left_index=True, right_index=True)
test['FN'] = np.where((test['Banknote']=="Genuine") & (test['Predicted']=="Forged"),1,0)
test[test.FN != 0]