I\'m doing a multiclass text classification in Scikit-Learn. The dataset is being trained using the Multinomial Naive Bayes classifier having hundreds of labels. Here\'s an extr
def to_table(report):
report = report.splitlines()
res = []
res.append(['']+report[0].split())
for row in report[2:-2]:
res.append(row.split())
lr = report[-1].split()
res.append([' '.join(lr[:3])]+lr[3:])
return np.array(res)
returns a numpy array which can be turned to pandas dataframe or just be saved as csv file.
I don't know if you still need a solution or not but this is best I have done to keep it in perfect format and still save it:
def classifcation_report_processing(model_to_report):
tmp = list()
for row in model_to_report.split("\n"):
parsed_row = [x for x in row.split(" ") if len(x) > 0]
if len(parsed_row) > 0:
tmp.append(parsed_row)
# Store in dictionary
measures = tmp[0]
D_class_data = defaultdict(dict)
for row in tmp[1:]:
class_label = row[0]
for j, m in enumerate(measures):
D_class_data[class_label][m.strip()] = float(row[j + 1].strip())
save_report = pd.DataFrame.from_dict(D_class_data).T
path_to_save = os.getcwd() +'/Classification_report.xlsx'
save_report.to_excel(path_to_save, index=True)
return save_report.head(5)
saving_CL_report_naive_bayes = classifcation_report_processing(classification_report(y_val, prediction))
If you want the individual scores this should do the job just fine.
import pandas as pd
def classification_report_csv(report):
report_data = []
lines = report.split('\n')
for line in lines[2:-3]:
row = {}
row_data = line.split(' ')
row['class'] = row_data[0]
row['precision'] = float(row_data[1])
row['recall'] = float(row_data[2])
row['f1_score'] = float(row_data[3])
row['support'] = float(row_data[4])
report_data.append(row)
dataframe = pd.DataFrame.from_dict(report_data)
dataframe.to_csv('classification_report.csv', index = False)
report = classification_report(y_true, y_pred)
classification_report_csv(report)
The simplest and best way I found is:
classes = ['class 1','class 2','class 3']
report = classification_report(Y[test], Y_pred, target_names=classes)
report_path = "report.txt"
text_file = open(report_path, "w")
n = text_file.write(report)
text_file.close()
I also found some of the answers a bit verbose. Here is my three line solution, using precision_recall_fscore_support
as others have suggested.
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support
report = pd.DataFrame(list(precision_recall_fscore_support(y_true, y_pred)),
index=['Precision', 'Recall', 'F1-score', 'Support']).T
# Now add the 'Avg/Total' row
report.loc['Avg/Total', :] = precision_recall_fscore_support(y_true, y_test,
average='weighted')
report.loc['Avg/Total', 'Support'] = report['Support'].sum()
We can get the actual values from the precision_recall_fscore_support function and then put them into data frames. the below code will give the same result, but now in pandas df :).
clf_rep = metrics.precision_recall_fscore_support(true, pred)
out_dict = {
"precision" :clf_rep[0].round(2)
,"recall" : clf_rep[1].round(2)
,"f1-score" : clf_rep[2].round(2)
,"support" : clf_rep[3]
}
out_df = pd.DataFrame(out_dict, index = nb.classes_)
avg_tot = (out_df.apply(lambda x: round(x.mean(), 2) if x.name!="support" else round(x.sum(), 2)).to_frame().T)
avg_tot.index = ["avg/total"]
out_df = out_df.append(avg_tot)
print out_df