实验报告人工神经网络实验原理:利用线性回归和神经网络建模技术分析预测。
实验题目:利用给出的葡萄酒数据集,解释获得的分析结论。
library(plspm); data(wines); wines实验要求:1、探索认识意大利葡萄酒数据集,对葡萄酒数据预处理,将其随机划分为训练集和测试集,然后创建一个线性回归模型;2、利用neuralnet包拟合神经网络模型;3、评估两个模型的优劣,如果都不理想,提出你的改进思路。
分析报告:1、线性回归模型> rm(list=ls())> gc()used (Mb) gc trigger (Mb) max used (Mb)Ncells 250340 13.4 608394 32.5 408712 21.9Vcells 498334 3.9 8388608 64.0 1606736 12.3>library(plspm)>data(wines)>wines[c(1:5),]class alcohol malic.acid ash alcalinity magnesium phenols flavanoids1 1 14.23 1.71 2.43 15.6 127 2.80 3.062 1 13.20 1.78 2.14 11.2 100 2.65 2.763 1 13.16 2.36 2.67 18.6 101 2.80 3.244 1 14.37 1.95 2.50 16.8 113 3.85 3.495 1 13.24 2.59 2.87 21.0 118 2.80 2.69nofla.phen proantho col.intens hue diluted proline1 0.28 2.29 5.64 1.04 3.92 10652 0.26 1.28 4.38 1.05 3.40 10503 0.30 2.81 5.68 1.03 3.17 11854 0.24 2.18 7.80 0.86 3.45 14805 0.39 1.82 4.32 1.04 2.93 735> data <- wines> summary(wines)class alcohol malic.acid ashMin. :1.000 Min. :11.03 Min. :0.740 Min. :1.3601st Qu.:1.000 1st Qu.:12.36 1st Qu.:1.603 1st Qu.:2.210Median :2.000 Median :13.05 Median :1.865 Median :2.360Mean :1.938 Mean :13.00 Mean :2.336 Mean :2.3673rd Qu.:3.000 3rd Qu.:13.68 3rd Qu.:3.083 3rd Qu.:2.558Max. :3.000 Max. :14.83 Max. :5.800 Max. :3.230alcalinity magnesium phenols flavanoids Min. :10.60 Min. : 70.00 Min. :0.980 Min. :0.340 1st Qu.:17.20 1st Qu.: 88.00 1st Qu.:1.742 1st Qu.:1.205 Median :19.50 Median : 98.00 Median :2.355 Median :2.135 Mean :19.49 Mean : 99.74 Mean :2.295 Mean :2.029 3rd Qu.:21.50 3rd Qu.:107.00 3rd Qu.:2.800 3rd Qu.:2.875 Max. :30.00 Max. :162.00 Max. :3.880 Max. :5.080 nofla.phen proantho col.intens hue Min. :0.1300 Min. :0.410 Min. : 1.280 Min. :0.4800 1st Qu.:0.2700 1st Qu.:1.250 1st Qu.: 3.220 1st Qu.:0.7825 Median :0.3400 Median :1.555 Median : 4.690 Median :0.9650 Mean :0.3619 Mean :1.591 Mean : 5.058 Mean :0.9574 3rd Qu.:0.4375 3rd Qu.:1.950 3rd Qu.: 6.200 3rd Qu.:1.1200 Max. :0.6600 Max. :3.580 Max. :13.000 Max. :1.7100 diluted prolineMin. :1.270 Min. : 278.01st Qu.:1.938 1st Qu.: 500.5Median :2.780 Median : 673.5Mean :2.612 Mean : 746.93rd Qu.:3.170 3rd Qu.: 985.0Max. :4.000 Max. :1680.0Num Variable Description 解释1 class Type of wine 葡萄酒的种类2 alcohol Alcohol 醇3 malic.acid Malic acid 苹果酸4 ash Ash 灰5 alcalinity Alcalinity 碱度6 magnesium Magnesium 镁7 phenols Total phenols 酚类8 flavanoids Flavanoids 黄酮9 nofla.phen Nonflavanoid phenols 非黄烷类酚类10 proantho Proanthocyanins 花青素11 col.intens Color intensity 颜色强度12 hue Hue 色调13 diluted OD280/OD315 of diluted wines 稀释的葡萄酒14 proline Proline 脯氨酸> apply(data,2,function(x) sum(is.na(x)))class alcohol malic.acid ash alcalinity magnesium phenols 0 0 0 0 0 0 0 flavanoids nofla.phen proantho col.intens hue diluted proline 0 0 0 0 0 0 0> dim(wines)[1] 178 14> set.seed(2)> test=sample(1:nrow(wines),100)> wines.train<-wines[-test,]> wines.test<-wines[test,]> dim(wines.train);dim(wines.test)[1] 78 14[1] 100 14> lm.fit <- glm(alcohol~., data=wines.train)> summary(lm.fit)Call:glm(formula = alcohol ~ ., data = wines.train)Deviance Residuals:Min 1Q Median 3Q Max-0.98017 -0.31067 -0.00405 0.36184 1.23885Coefficients:Estimate Std. Error t value Pr(>|t|)(Intercept) 13.0661361 1.2664910 10.317 3.04e-15 ***class -0.4043994 0.2389115 -1.693 0.09538 .malic.acid 0.1612962 0.0730559 2.208 0.03085 *ash 0.2621448 0.3669235 0.714 0.47755alcalinity -0.0591380 0.0328684 -1.799 0.07670 .magnesium 0.0003567 0.0052733 0.068 0.94628phenols 0.1719659 0.2078450 0.827 0.41110flavanoids -0.1780915 0.1815817 -0.981 0.33039nofla.phen -0.4623220 0.7409499 -0.624 0.53487proantho -0.2402948 0.1449535 -1.658 0.10226col.intens 0.1580059 0.0447835 3.528 0.00078 ***hue 0.1226260 0.4205420 0.292 0.77154diluted -0.0889085 0.1967579 -0.452 0.65289proline 0.0008112 0.0003943 2.058 0.04371 *---Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1(Dispersion parameter for gaussian family taken to be 0.2968956)Null deviance: 57.473 on 77 degrees of freedomResidual deviance: 19.001 on 64 degrees of freedomAIC: 141.2Number of Fisher Scoring iterations: 2> pr.lm <- predict(lm.fit,wines.test)> MSE.lm <- sum((pr.lm - wines.test$alcohol)^2)/nrow(wines.test)> print(MSE.lm)[1] 0.30436252、神经网络模型> maxs <- apply(wines, 2, max)> mins <- apply(wines, 2, min)> scaled <- as.data.frame(scale(wines, center = mins, scale = maxs - mins))> index <- sample(1:nrow(wines),round(0.75*nrow(wines)))> train_ <- scaled[index,]> test_ <- scaled[index,]> library(neuralnet)> n <- names(train_)> f <- as.formula(paste("alcohol~", paste(n[!n %in% "alcohol"], collapse = " + ")))> nn <- neuralnet(f,data=train_,hidden=c(5,3),linear.output=T)> plot(nn)>pr.nn <- compute(nn,test_[,1:13])>pr.nn__<-pr.nn$net.result*(max(test_$alcohol)-min(test_$alcohol))+mi n(test_$alcohol)>test.r1<-(test_$alcohol)*(max(test_$alcohol)-min(test_$alcohol))+min (test_$alcohol)> MSE.nn1 <- sum((test.r1 - pr.nn__)^2)/nrow(test_)> print(paste(MSE.lm,MSE.nn1))[1] "0.304362456679839 0.14726865189892"3、模型修正>par(mfrow=c(1,2))>plot(test_$alcohol,pr.nn__,col='red',main='Real vs predicted NN',pch=18,cex=0.7)>abline(0,1,lwd=2)>legend('bottomright',legend='NN',pch=18,col='red', bty='n')>plot(wines.test$alcohol,pr.lm,col='blue',main='Real vs predictedlm',pch=18, cex=0.7) >abline(0,1,lwd=2)>legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=0.7)0.00.40.80.00.20.40.60.81.01.2Real vs predicted NNtest_$alcohol p r .n n __11.512.513.511.512.012.513.013.514.014.5Real vs predicted lmwines.test$alcoholp r .lm> par(mfrow=c(1,1))> plot(test_$alcohol,pr.nn__,col='red',main='Real vs predicted NN',pch=18,cex=0.7)> points(wines.test$alcohol,pr.lm,col='blue',pch=18,cex=0.7) > abline(0,1,lwd=2)>legend('bottomright',legend=c('NN','LM'),pch=18,col=c('red','blue'))> library(boot)> set.seed(200)> lm.fit <- glm(alcohol~.,data=data)> cv.glm(data,lm.fit,K=10)$delta[1][1] 0.3058061679>set.seed(450)>cv.error <- NULL>k <- 10>library(plyr)>pbar <- create_progress_bar('text')>pbar$init(k)>for(i in 1:k){index <- sample(1:nrow(data),round(0.9*nrow(data)))train.cv <- scaled[index,]test.cv <- scaled[-index,]nn <- neuralnet(f,data=train.cv,hidden=c(5,2),linear.output=T)pr.nn <- compute(nn,test.cv[,1:13])pr.nn__<-pr.nn$net.result*(max(test_$alcohol)-min(test_$alcohol))+min (test_$alcohol)test.cv.r <- (test.cv$alcohol)*(max(test.cv$alcohol)-min(test.cv$alcohol))+min(tes t.cv$alcohol)cv.error[i] <- sum((test.cv.r - pr.nn__)^2)/nrow(test.cv)pbar$step()}> mean(cv.error)[1] 0.06900470043> cv.error[1] 0.0791******* 0.10556665990 0.05904083258 0.0714******* 0.0992******* [6] 0.03239406600 0.04807466437 0.0999******* 0.0355******* 0.0596*******> par(mfrow=c(1,1))> par(mfrow=c(1,1))> boxplot(cv.error,xlab='MSE CV',col='cyan',+ border='blue',names='CV error (MSE)',+ main='CV error (MSE) for NN',horizontal=TRUE)0.040.060.080.10MSE CV> cv.error[i] [1] 0.0596547757。