欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

R语言实现随机森林算法

程序员文章站 2024-02-27 23:47:39
...
library(xlsx)
data<-read.xlsx("E:\\数据分析\\data.xlsx",1)
#将数据集分为训练集和测试集,比例为7:3
train_sub = sample(nrow(data),7/10*nrow(data))
train_data = data[train_sub,]
test_data = data[-train_sub,]
library(pROC) #绘制ROC曲线
library(randomForest)
#数据预处理
train_data$buy= as.factor(train_data$buy)
test_data$buy= as.factor(test_data$buy)
wine_randomforest <- randomForest(buy ~  behavior+reason+money,
                                          data = train_data,
                                          ntree =500,
                                          mtry=3,
                                          importance=TRUE ,
                                          proximity=TRUE)
#查看变量的重要性
wine_randomforest$importance
varImpPlot(wine_randomforest, main = "variable importance")
#对测试集进行预测
pre_ran <- predict(wine_randomforest,newdata=test_data)
#将真实值和预测值整合到一起
obs_p_ran = data.frame(prob=pre_ran,obs=test_data$buy)
#输出混淆矩阵
table(test_data$buy,pre_ran,dnn=c("真实值","预测值"))
#绘制ROC曲线
ran_roc <- roc(test_data$ buy,as.numeric(pre_ran))
plot(ran_roc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),grid.col=c("green", "red"), max.auc.polygon=TRUE,auc.polygon.col="skyblue", print.thres=TRUE,main='随机森林模型ROC曲线,mtry=3,ntree=500')
MDSplot(wine_randomforest,data$buy,k=2,palette=NULL,pch=20)
plot(wine_randomforest)
treesize(wine_randomforest,terminal = TRUE)

wine_randomforest2<- randomForest(buy ~  behavior+reason+money,
                                          data = train_data,
                                          ntree =200,
                                          mtry=3,
                                          importance=TRUE ,
                                          proximity=TRUE)
#改变树的个数再次进行拟合
#查看变量的重要性
wine_randomforest2$importance
varImpPlot(wine_randomforest2, main = "variable importance")
#对测试集进行预测
pre_ran <- predict(wine_randomforest2,newdata=test_data)
#将真实值和预测值整合到一起
obs_p_ran = data.frame(prob=pre_ran,obs=test_data$buy)
#输出混淆矩阵
table(test_data$buy,pre_ran,dnn=c("真实值","预测值"))
#绘制ROC曲线
ran_roc <- roc(test_data$ buy,as.numeric(pre_ran))
plot(ran_roc, print.auc=TRUE, auc.polygon=TRUE, grid=c(0.1, 0.2),grid.col=c("green", "red"), max.auc.polygon=TRUE,auc.polygon.col="skyblue", print.thres=TRUE,main='随机森林模型ROC曲线,mtry=3,ntree=200')
MDSplot(wine_randomforest2,data$buy,k=2,palette=NULL,pch=20)
plot(wine_randomforest2)
treesize(wine_randomforest2,terminal = TRUE)

数据集获取地址

链接:https://pan.baidu.com/s/1jDE37PoiDh8L5wfyekFFkw 
提取码:dr25

相关标签: R语言