当前位置:文档之家› 【原创】R语言版数据挖掘常用模型构建示例附代码数据

【原创】R语言版数据挖掘常用模型构建示例附代码数据

rules=apriori(Groceries,parameter=list(support=0.01,confidence=0.01)) #求关联规则
summary(rules) #察看求得的关联规则之摘要
x=subset(rules,subset=rhs%in%"whole milk"&lift>=1.2) #求所需要的关联规则子集
Linear Regression
library(MASS)
lm_fit = lm(medv~poly(rm,2)+crim,data = Boston) #构建线性模型
summary(lm_fit) #检查线性模型
Ridge Regreesion and Lasso
#岭回归与lasso回归跟其他模型不同,不能直接以公式的形式把数据框直接扔进去,也不支持subset;所以数据整理工作要自己做
Princpal Content Analysis
library(ISLR)
pr.out = prcomp(USArrests,scale. = T)
pr.out$rotation
biplot(pr.out,scale = 0)
Apriori
library(arules) #加载arules程序包
data(Groceries) #调用数据文件
Carseats.test = Carseats[-train,]
High.test = High[-train]
tree.carseats = tree(High~.-Sales,Carseats,subset=train) #建立决策树模型
summary(tree.carseats)
#可视化决策树
inspect(sort(x,by="support")[1:5]) #根据支持度对求得的关联规则子集排序并察看
library(glmnet)
library(ISLR)
Hitters = na.omit(Hitters)
x = rix(Salary~., Hitters)[,-1] #构建回归设计矩阵
y = Hitters$Salary
ridge.mod = glmnet(x,y,alpha = 0,lambda = 0.1) #构建岭回归模型
plot(tree.carseats)
text(tree.carseats,pretty = 0)
Random Fores
library(randomForest)
library(MASS)
train = sample(1:nrow(Boston),nrow(Boston)/2)
boston.test = Boston[-train,]
library(tree)
library(ISLR)
attach(Carseats)
High = ifelse(Sales <= 8 ,"No","Yes")
Carseats = data.frame(Carseats,High)
train = sample(1:nrow(Carseats),200)
Naive Bayse
library(e1071)
classifier<-naiveBayes(iris[,c(1:4)],iris[,5]) #构建朴素贝叶斯模型
table(predict(classifier,iris[,-5]),iris[,5]) #应用朴素贝叶斯模型预测
Decision Tree
rf.boston = randomForest(medv~.,data = Boston,subset = train,mtry=6,importance=T)
rf.boston
summary(rf.boston)
Boosting
library(gbm)
library(MASS)
train = sample(1:nrow(Boston),nrow(Boston)/2)
lasso.mod = glmnet(x,y,alpha = 1,lambda = 0.1) #构建lasso回归模型
Logistic Regression
library(ISLR)
train = Smarket$Year<2005
logistic.fit = glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,data=Smarket,family=binomial, subset=train) #构建逻辑回归模型
frequentsets=eclat(Groceries,parameter=list(support=0.05,maxlen=10)) #求频繁项集
inspect(frequentsets[1:10])#察看求得的频繁项集
inspect(sort(frequentsets,by="support")[1:10]) #根据支持度对求得的频繁项集排序并察看(等价于inspect(sort(frequentsets)[1:10])
boston.test = Boston[-train,]
boost.boston = gbm(medv~.,data = Boston[train,],distribution = "gaussian",n.trees=5000,interaction.depth=4)
boost.boston
summary(boost.boston)
glm.probs = predict(glm.fit,newdata=Smarket[!train,],type="class")
K-Nearest Neighbor
library(class)
library(ISLR)
standardized.X=scale(Caravan[,-86]) #先进行变量标准化
test <- 1:1000
train.X <- standardized.X[-test,]
train.Y <- Caravan$Purchase[-test]
test.X <- standardized.X[test,]
test.Y <- Caravan$Purchase[test]
knn.pred <- knn(train.X,test.X,train.Y,k=3) #直接给出测试集预测结果
相关主题