主要利用R语言进行随机森林回归,还有其他两种回归,
library(lattice)
library(grid)
library(DMwR)
library(rpart)
library(ipred)
library(randomForest)
#回归树,装袋算法,随机森林三大回归
#前二种算法可以计算缺失数据,但随机森林不行,所以还需将数据进行清洗整理
data(algae)
algae <- algae[-manyNAs(algae,0.2), ]#占有20%的NA值的行去掉
clean.algae <- knnImputation(algae,k=10)#平均值填充NA值
#回归树模型计算
model.tree=rpart(a1 ~ ., data = clean.algae[, 1:12])
summary(model.tree)
pre.tree <- predict(model.tree, clean.algae)
plot(pre.tree~clean.algae$a1)
nmse1 <- mean((pre.tree- clean.algae[,'a1'])^2)/mean((mean(clean.algae[,'a1'])- clean.algae[,'a1'])^2)
nmse1
1