R credit

江枫思渺然 提交于 2019-12-01 07:46:40
library(devtools)
devtools::install_github("ayhandis/creditR")
library(creditR)
ls("package:creditR")
data("germancredit")
str(germancredit)
head(germancredit)
sample_data <- germancredit[,c("duration.in.month","credit.amount",
                               "installment.rate.in.percentage.of.disposable.income", 
                               "age.in.years","creditability")]
sample_data$creditability <- ifelse(sample_data$creditability == "bad",1,0)
missing_ratio(sample_data)
traintest <- train_test_split(sample_data,123,0.70)
train <- traintest$train
test <- traintest$test
woerules <- woe.binning(df = train,target.var = "creditability",pred.var = train,event.class = 1)
train_woe <- woe.binning.deploy(train, woerules, add.woe.or.dum.var='woe')
train_woe <- woe.get.clear.data(train_woe,default_flag = "creditability",prefix = "woe")
test_woe <- woe.binning.deploy(test, woerules, add.woe.or.dum.var='woe')
test_woe <- woe.get.clear.data(test_woe,default_flag = "creditability",prefix = "woe")
IV.calc.data(train_woe,"creditability")
Gini.univariate.data(train_woe,"creditability")
eliminated_data <- Gini_elimination(train_woe,"creditability",0.10)
str(eliminated_data)
clustering_data <- variable.clustering(eliminated_data,"creditability", 2)
clustering_data
selected_data <- variable.clustering.gini(eliminated_data,"creditability", 2)
correlation.cluster(eliminated_data,clustering_data,variables = "variable",clusters = "Group")
model= glm(formula = creditability ~ ., family = binomial(link = "logit"),  data = eliminated_data)
summary(model)
woe.glm.feature.importance(eliminated_data,model,"creditability")
ms_train_data <- cbind(eliminated_data,model$fitted.values)
ms_test_data <- cbind(test_woe[,colnames(eliminated_data)], 
                      predict(model,type = "response", newdata = test_woe))
colnames(ms_train_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned",
                             "woe.installment.rate.in.percentage.of.disposable.income.binned",
                             "creditability","PD")
colnames(ms_test_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned",
                            "woe.installment.rate.in.percentage.of.disposable.income.binned",
                            "creditability","PD")
regression_calibration <- regression.calibration(model,test_woe,"creditability")
regression_calibration$calibration_data
regression_calibration$calibration_model
regression_calibration$calibration_formula
master_scale <- master.scale(ms_train_data,"creditability","PD")
master_scale
ms_train_data$Score = log(ms_train_data$PD/(1-ms_train_data$PD))
ms_test_data$Score = log(ms_test_data$PD/(1-ms_test_data$PD))
bayesian_method <- bayesian.calibration(data = master_scale,average_score ="Score",total_observations = "Total.Observations",PD = "PD",central_tendency = 0.05,calibration_data = ms_train_data,calibration_data_score ="Score")
bayesian_method$Calibration.model
bayesian_method$Calibration.formula
scaled.score(bayesian_method$calibration_data, "calibrated_pd", 3000, 15)
vif.calc(model)
Gini(model$fitted.values,ms_train_data$creditability)
k.fold.cross.validation.glm(ms_train_data,"creditability",5,1)
Kolmogorov.Smirnov(ms_train_data,"creditability","PD")
Kolmogorov.Smirnov(ms_test_data,"creditability","PD")
SSI.calc.data(train_woe,test_woe,"creditability")
Herfindahl.Hirschman.Index(master_scale,"Total.Observations")
Anchor.point(master_scale,"PD","Total.Observations",0.30)
chisquare.test(master_scale,"PD","Bad.Count","Total.Observations",0.90)
master_scale$DR <- master_scale$Bad.Count/master_scale$Total.Observations
Binomial.test(master_scale,"Total.Observations","PD","DR",0.90,"one")
 

  

 

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!