通过sklearn中的红酒训练集测试并且画树
保存到了桌面的pdf文件中,目前还没有处理中文显示问题。
from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import pandas as pd
wine = load_wine()
wine.data.shape
wine.target
pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1)
wine.feature_names
wine.target_names
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)
clf = tree.DecisionTreeClassifier(criterion="entropy")
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest) #返回预测的准确度
print(score)
feature_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素','颜 色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
import graphviz
dot_data=tree.export_graphviz(clf
,feature_names=feature_name
,class_names=["琴酒","雪莉","贝尔摩德"]
,filled=True
,rounded=True
)
graph = graphviz.Source(dot_data)
graph.render("C:\\Users\\JYuXuAN\\Desktop\\tree")
sklearn决策树与随机森林的差异
随机森林是利用了装袋法,把许多棵树集合在一起得到最优的值。
精度一定比单一的决策树要高出很多。
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt
wine = load_wine()
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data, wine.target, test_size=0.3)
# clf = DecisionTreeClassifier(random_state=0)
# rfc = RandomForestClassifier(random_state=0)
# clf = clf.fit(Xtrain, Ytrain)
# rfc = rfc.fit(Xtrain, Ytrain)
# score_c = clf.score(Xtest, Ytest)
# score_r = rfc.score(Xtest, Ytest)
#
# print("Single Tree:{}".format(score_c))
# print("Random Forest:{}.".format(score_r))
rfc_l=[]
clf_l=[]
for i in range(10):
rfc = RandomForestClassifier(n_estimators=25)
rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
print(rfc_s)
# print(f"随机森林第{i}次:"+rfc_s)
rfc_l.append(rfc_s)
clf = DecisionTreeClassifier()
clf_s = cross_val_score(clf,wine.data,wine.target,cv=10).mean()
print(clf_s)
# print(f"决策树第{i}次:"+clf_s)
clf_l.append(clf_s)
plt.plot(range(1,11),rfc_l,label = "Random Forest")
plt.plot(range(1,11),clf_l,label = "Decision Tree")
plt.legend()
plt.show()
画出的图像
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt
wine=load_wine()
superpa = []
for i in range(200):
rfc = RandomForestClassifier(n_estimators=i+1,n_jobs=-1)
rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
superpa.append(rfc_s)
print(max(superpa),superpa.index(max(superpa)))
plt.figure(figsize=[20,5])
plt.plot(range(1,201),superpa)
plt.show()
调参n_estimators 200 次 左右的结果数据图
来源:CSDN
作者:学习不易
链接:https://blog.csdn.net/qq_43656233/article/details/103413031