Step1:
from sklearn import datasets dataset = datasets.make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2) print(dataset)
Step2:
from sklearn.model_selection import KFold kf = KFold(n_splits=10) for train_index, test_index in kf.split(dataset[0]): X_train, X_test = dataset[0][train_index], dataset[0][test_index] y_train, y_test = dataset[1][train_index], dataset[1][test_index]
Step3和Step4:
from sklearn.naive_bayes import GaussianNB from sklearn import metrics def test_GaussianNB(X_train, y_train, X_test, y_test): clf = GaussianNB() clf.fit(X_train, y_train) pred = clf.predict(X_test) print("GaussianNB: ") acc = metrics.accuracy_score(y_test, pred) print('acc: ', acc) f1 = metrics.f1_score(y_test, pred) print('f1: ', f1) auc = metrics.roc_auc_score(y_test, pred) print('auc: ', auc) from sklearn.svm import SVC def test_SVC(X_train, y_train, X_test, y_test): clf = SVC(C=1e-01, kernel='rbf', gamma=0.1) clf.fit(X_train, y_train) pred = clf.predict(X_test) print("SVC: ") acc = metrics.accuracy_score(y_test, pred) print('acc: ', acc) f1 = metrics.f1_score(y_test, pred) print('f1: ', f1) auc = metrics.roc_auc_score(y_test, pred) print('auc: ', auc) from sklearn.ensemble import RandomForestClassifier def test_nRandomForest(X_train, y_train, X_test, y_test): clf = RandomForestClassifier(n_estimators=6) clf.fit(X_train, y_train) pred = clf.predict(X_test) print("RandomForest: ") acc = metrics.accuracy_score(y_test, pred) print('acc: ', acc) f1 = metrics.f1_score(y_test, pred) print('f1: ', f1) auc = metrics.roc_auc_score(y_test, pred) print('auc: ', auc)
测试:
k = 1 for train_index, test_index in kf.split(dataset[0]): X_train, X_test = dataset[0][train_index], dataset[0][test_index] y_train, y_test = dataset[1][train_index], dataset[1][test_index] print('test ', k, ': ') test_GaussianNB(X_train, y_train, X_test, y_test) test_SVC(X_train, y_train, X_test, y_test) test_nRandomForest(X_train, y_train, X_test, y_test) print('\n') k += 1
结果:
test 1 : GaussianNB: acc: 0.93 f1: 0.9391304347826087 auc: 0.9242424242424243 SVC: acc: 0.93 f1: 0.9380530973451328 auc: 0.9262626262626262 RandomForest: acc: 0.93 f1: 0.9369369369369368 auc: 0.9282828282828283 test 2 : GaussianNB: acc: 0.91 f1: 0.9158878504672897 auc: 0.9089635854341738 SVC: acc: 0.94 f1: 0.9423076923076923 auc: 0.9395758303321329 RandomForest: acc: 0.95 f1: 0.9514563106796117 auc: 0.9497799119647861 test 3 : GaussianNB: acc: 0.97 f1: 0.9696969696969697 auc: 0.9701880752300921 SVC: acc: 0.91 f1: 0.9052631578947369 auc: 0.9093637454981992 RandomForest: acc: 0.93 f1: 0.9263157894736843 auc: 0.9293717486994799 test 4 : GaussianNB: acc: 0.89 f1: 0.8865979381443299 auc: 0.8914090726615816 SVC: acc: 0.91 f1: 0.9072164948453608 auc: 0.9114813327980731 RandomForest: acc: 0.93 f1: 0.9263157894736843 auc: 0.930349257326375 test 5 : GaussianNB: acc: 0.91 f1: 0.9203539823008849 auc: 0.9053945249597423 SVC: acc: 0.97 f1: 0.9724770642201834 auc: 0.9690016103059581 RandomForest: acc: 0.95 f1: 0.9514563106796117 auc: 0.9537037037037037 test 6 : GaussianNB: acc: 0.9 f1: 0.8936170212765957 auc: 0.9009661835748792 SVC: acc: 0.93 f1: 0.9230769230769231 auc: 0.928743961352657 RandomForest: acc: 0.95 f1: 0.945054945054945 auc: 0.9488727858293077 test 7 : GaussianNB: acc: 0.96 f1: 0.9545454545454546 auc: 0.9594155844155845 SVC: acc: 0.96 f1: 0.9545454545454546 auc: 0.9594155844155845 RandomForest: acc: 0.98 f1: 0.9767441860465117 auc: 0.9772727272727273 test 8 : GaussianNB: acc: 0.93 f1: 0.9278350515463919 auc: 0.9335748792270531 SVC: acc: 0.93 f1: 0.9278350515463919 auc: 0.9335748792270531 RandomForest: acc: 0.96 f1: 0.9574468085106383 auc: 0.9613526570048309 test 9 : GaussianNB: acc: 0.94 f1: 0.9400000000000001 auc: 0.9403761504601841 SVC: acc: 0.95 f1: 0.9494949494949495 auc: 0.9501800720288115 RandomForest: acc: 0.95 f1: 0.9494949494949495 auc: 0.9501800720288115 test 10 : GaussianNB: acc: 0.95 f1: 0.9557522123893805 auc: 0.9480519480519481 SVC: acc: 0.94 f1: 0.9464285714285714 auc: 0.9391233766233766 RandomForest: acc: 0.96 f1: 0.9636363636363636 auc: 0.9618506493506493
Step5:
通过观察这十个测试,可以知道RandomForest是三个算法中最好的,而SVC差一些,朴素贝叶斯是最差的。
文章来源: sklearn习题