sklearn模型的保存和加载API
from sklearn.externals import joblib
。保存:joblib.dump(estimator,'test.pkl')
。加载:estimator=joblib.load('test.pk')
将训练模型保存下来,下次有新的需要预测的数据传进来的时候直接加载模型,然后预测就不用每次都从新训练了。
案例:癌症(逻辑回归)分类预测–良/恶性
import pandas as pd
import numpy as np
'''# 1、读取数据'''
path = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
column_name = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',
'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin',
'Normal Nucleoli', 'Mitoses', 'Class']
data = pd.read_csv(path, names=column_name)
'''2 数据预处理'''
# 2、缺失值处理
# 1)替换-》np.nan
data = data.replace(to_replace="?", value=np.nan)
# 2)删除有缺失值的样本
data.dropna(inplace=True)
'''# 3、划分数据集'''
from sklearn.model_selection import train_test_split
# 筛选特征值和目标值
x = data.iloc[:, 1:-1]
y = data["Class"]
x_train, x_test, y_train, y_test = train_test_split(x, y)
'''# 4、特征工程---标准化'''
from sklearn.preprocessing import StandardScaler
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
'''# 5、预估器流程'''
from sklearn.linear_model import LogisticRegression
estimator = LogisticRegression()
estimator.fit(x_train, y_train)
'''模型保存'''
from sklearn.externals import joblib
joblib.dump(estimator,'LogisticRegression.pkl') #将模型保存在当前路劲下的LogisticRegression.pk文件里
# 返回逻辑回归的模型参数:回归系数和偏置
print('回归系数:',estimator.coef_)
print("误差(偏置):",estimator.intercept_)
'''# 6、模型评估'''
# 方法1:直接比对真实值和预测值
y_predict = estimator.predict(x_test) #计算预测值
print("y_predict(预测值):\n", y_predict)
# print("直接比对真实值和预测值:\n", y_test == y_predict)
# 方法2:计算准确率
score = estimator.score(x_test, y_test)
print("准确率为:", score)
# 查看精确率、召回率、F1-score
from sklearn.metrics import classification_report
report = classification_report(y_test, y_predict, labels=[2, 4], target_names=["良性", "恶性"])
print("查看精确率、召回率、F1-score:\n",report)
# ROC曲线与AUC指标
from sklearn.metrics import roc_auc_score
#print(y_test.head())
# y_true:每个样本的真实类别,必须为0(反例),1(正例)标记
# 将y_test 转换成 0 ,1
y_true = np.where(y_test > 3, 1, 0)
# print(y_true)
AUC = roc_auc_score(y_true, y_predict) #计算AUC指标:越接近1越好
print("AUC:",AUC)
回归系数: [[1.47945227 0.07579265 0.59505721 0.69195463 0.33274168 1.16446335
1.16645995 0.92205206 0.72380317]]
误差(偏置): [-0.93015988]
y_predict(预测值):
[4 4 2 4 2 2 4 4 4 2 2 4 2 2 4 2 4 4 2 2 2 2 4 2 2 4 2 2 4 2 4 4 2 4 2 4 2
4 2 2 2 4 4 2 4 2 2 4 2 2 2 4 4 2 2 2 4 2 2 2 4 2 2 2 4 2 4 2 4 4 4 2 4 2
2 4 2 2 2 4 2 4 2 2 2 4 4 4 2 4 2 2 4 2 4 4 2 2 2 2 2 2 2 4 2 2 2 4 2 2 2
2 2 2 2 2 2 2 2 4 2 2 2 2 2 2 4 4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 4 2 2 2 2 4
2 2 2 2 2 2 2 4 2 4 2 2 2 4 4 4 2 2 2 2 4 2 2]
准确率为: 0.9766081871345029
查看精确率、召回率、F1-score:
precision recall f1-score support
良性 0.98 0.98 0.98 114
恶性 0.96 0.96 0.96 57
avg / total 0.98 0.98 0.98 171
AUC: 0.9736842105263157
'''加载模型并进行预测'''
#载入需要预测的特征变量
x = (data.iloc[:, 1:-1]).sample(50) #传入需要预测的特征变量,这里就暂时使用原来数据随机抽50个
#对特征变量进行标准化处理
from sklearn.preprocessing import StandardScaler #对特征变量进行标准化处理
transfer = StandardScaler()
x_s = transfer.fit_transform(x)
#载入模型并预测
from sklearn.externals import joblib #调用加载模型的API
from sklearn.linear_model import LogisticRegression #调用模型的API
estimator=joblib.load('LogisticRegression.pkl') #加载模型
y_predict = estimator.predict(x_s) #计算预测值
print("y_predict(预测值):\n", y_predict)
# 结果展示:构造二维表
import pandas as pd
data = pd.DataFrame(x)
data["y_predict"]= y_predict
display(data.head())
y_predict(预测值):
[2 2 4 2 2 4 4 4 2 2 2 2 2 4 4 4 2 2 2 4 4 2 2 2 2 2 2 2 2 2 2 2 2 4 2 2 2
2 2 4 2 4 4 4 2 4 2 2 2 2]
Clump Thickness | Uniformity of Cell Size | Uniformity of Cell Shape | Marginal Adhesion | Single Epithelial Cell Size | Bare Nuclei | Bland Chromatin | Normal Nucleoli | Mitoses | y_predict | |
---|---|---|---|---|---|---|---|---|---|---|
429 | 2 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
219 | 6 | 1 | 3 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
320 | 7 | 6 | 3 | 2 | 5 | 10 | 7 | 4 | 6 | 4 |
423 | 5 | 1 | 3 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
678 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
来源:CSDN
作者:Jalen data analysis
链接:https://blog.csdn.net/weixin_41685388/article/details/104515820