Sklearn 决策树与随机森林

荒凉一梦 提交于 2020-01-14 19:56:30

通过sklearn中的红酒训练集测试并且画树

保存到了桌面的pdf文件中,目前还没有处理中文显示问题。

from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

import pandas as pd

wine = load_wine()
wine.data.shape
wine.target
pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1)

wine.feature_names
wine.target_names
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)

clf = tree.DecisionTreeClassifier(criterion="entropy")
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest) #返回预测的准确度
print(score)

feature_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素','颜 色强度','色调','od280/od315稀释葡萄酒','脯氨酸']

import graphviz
dot_data=tree.export_graphviz(clf
                              ,feature_names=feature_name
                              ,class_names=["琴酒","雪莉","贝尔摩德"]
                              ,filled=True
                              ,rounded=True
                              )
graph = graphviz.Source(dot_data)
graph.render("C:\\Users\\JYuXuAN\\Desktop\\tree")

sklearn决策树与随机森林的差异

随机森林是利用了装袋法,把许多棵树集合在一起得到最优的值。
精度一定比单一的决策树要高出很多。

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt

wine = load_wine()
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data, wine.target, test_size=0.3)

# clf = DecisionTreeClassifier(random_state=0)
# rfc = RandomForestClassifier(random_state=0)
# clf = clf.fit(Xtrain, Ytrain)
# rfc = rfc.fit(Xtrain, Ytrain)
# score_c = clf.score(Xtest, Ytest)
# score_r = rfc.score(Xtest, Ytest)
#
# print("Single Tree:{}".format(score_c))
# print("Random Forest:{}.".format(score_r))

rfc_l=[]
clf_l=[]

for i in range(10):
    rfc = RandomForestClassifier(n_estimators=25)
    rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
    print(rfc_s)
#   print(f"随机森林第{i}次:"+rfc_s)
    rfc_l.append(rfc_s)
    clf = DecisionTreeClassifier()
    clf_s = cross_val_score(clf,wine.data,wine.target,cv=10).mean()
    print(clf_s)
#  print(f"决策树第{i}次:"+clf_s)
    clf_l.append(clf_s)

plt.plot(range(1,11),rfc_l,label = "Random Forest")
plt.plot(range(1,11),clf_l,label = "Decision Tree")
plt.legend()
plt.show()

画出的图像
在这里插入图片描述

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt

wine=load_wine()

superpa = []
for i in range(200):
    rfc = RandomForestClassifier(n_estimators=i+1,n_jobs=-1)
    rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
    superpa.append(rfc_s)

print(max(superpa),superpa.index(max(superpa)))
plt.figure(figsize=[20,5])
plt.plot(range(1,201),superpa)
plt.show()

调参n_estimators 200 次 左右的结果数据图
在这里插入图片描述

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!