当前位置:文档之家› 多元统计正态性检验作业

多元统计正态性检验作业

多元统计正态性检验作业3.13 (1)对每个分量检验是否是一元正态分布 1.一维边缘分布的正态性检验 Q-Q 图检验法>data1=data.frame(x1=c(260,200,240,170,270,205,190,200,250,200,225,210,170,270,190,280,310,270,250,260),x2=c(75,72,87,65,110,130,69,46,117,107,130,125,64,76,60,81,119,57,67,135),x3=c(40,34,45,39,39,34,27,45,21,28,36,26,31,33,34,20,25,31,31,39), x4=c(18,17,18,17,24,23,15,15,20,20,11,17,14,13,16,18,15,8,14,29)) >data2=data.frame(x1=c(310,310,190,225,170,210,280,210,280,200,200,280,190,295,270,280,240,280,370,280),x2=c(122,60,40,65,65,82,67,38,65,76,76,94,60,55,125,120,62,69,70,40),x3=c(30,35,27,34,37,31,37,36,30,40,39,26,33,30,24,32,32,29,30,37), + x4=c(21,18,15,16,16,17,18,17,23,17,20,11,17,16,21,18,20,20,20,17)) >data3=data.frame(x1=c(320,260,360,295,270,380,240,260,260,295,240,310,330,345,250,260,225,345,360,250),x2=c(64,59,88,100,65,114,55,55,110,73,114,103,112,127,62,59,100,120,107,117),x3=c(39,37,28,36,32,36,42,34,29,33,38,32,21,24,22,21,34,36,25,36),x4=c(17,11,26,12,21,21,10,20,20,21,18,18,11,20,16,19,30,18,23,16)) > data=rbind(data1,data2,data3)> qqnorm(data[,1]);qqline(data1[,1])> qqnorm(data[,2]);qqline(data1[,2])-2-1012200250300350Normal Q-Q PlotTheoretical QuantilesS a m p l e Q u a n t i l e s> qqnorm(data[,3]);qqline(data1[,3])> qqnorm(data[,4]);qqline(data1[,4])-2-1012406080100120Normal Q-Q PlotTheoretical QuantilesS a m p l e Q u a n t i l e s-2-1012202530354045Normal Q-Q PlotTheoretical QuantilesS a m p l e Q u a n t i l es-2-10121015202530Normal Q-Q PlotTh eoretical Qu an tilesS a m p le Q u a n ti le s2.二元数据的正态性检验1.等椭圆检验法以检验(X1,X2)是否服从二元正态分布为例> datax1x2=as.matrix(cbind(data[,1],data[,2]))> mean1=apply(datax1x2,2,mean);mean1[1] 259.08333 84.11667> s1=cov(datax1x2);s1[,1] [,2][1,] 2787.7048 433.6681[2,] 433.6681 753.9692> D2=c()> for(i in 1:n){+ D2[i]=(datax1x2[i,]-mean1)%*%solve(s1)%*%t(t(datax1x2[i,]-mean1)) + cat(D2[i])}> D2[1] 0.1251861 1.2646933 0.1805204 2.8870075 0.8947723 5.3436674 1.7397987 2.4709765 1.7429712 2.7508022 4.2330751 4.2933030 2.9037836 [14] 0.1830727 1.9723611 0.2160584 1.9889309 1.2522066 0.3888125 3.7506293 2.2376907 2.4250881 3.3340246 0.6947016 2.8870075 0.9085821 [27] 0.7613934 3.0212101 0.8859113 1.2539095 1.2539095 0.2209607 1.9723611 2.2170959 2.2793965 1.7078104 0.6647098 0.6485288 5.8467138 [40] 3.4256245 2.6068141 0.9296899 3.8566119 0.6171641 0.6738596 5.4233607 1.1265728 1.2473480 0.9654576 0.8691904 1.7027351 1.1050343 [53] 2.2176911 3.9171163 0.6539759 0.9296899 1.0704952 3.3865331 3.7284302 1.7429712> po=0.5> p=dim(data)[2];p[1] 4> d0=qchisq(p0,p);d0[1] 3.356694> a=sum(D2<d0);a[1] 49> pi=a/n;pi[1] 0.8166667 #p0取0.5时,马氏距离小于d0的个数为49,占总样品比例约为0.82,拒绝来自二元正态分布的假设> p0=0.25> d0=qchisq(p0,p);d0[1] 1.922558> a=sum(D2<d0);a[1] 33> pi=a/n> pi[1] 0.55 #p0取0.5时,马氏距离小于d0的个数为33,占总样品比例约为0.55,拒绝来自二元正态分布的假设2.二元数据的2χ图检验法> pt=c()> for(t in 1:n){+ pt[t]=(t-0.5)/n+ cat(pt[t])}> pt #pt[1] 0.008333333 0.025000000 0.041666667 0.058333333 0.075000000 0.091666667 0.108333333 0.125000000 0.141666667 0.158333333 0.175000000 [12] 0.191666667 0.208333333 0.225000000 0.241666667 0.258333333 0.275000000 0.291666667 0.308333333 0.325000000 0.341666667 0.358333333 [23] 0.375000000 0.391666667 0.408333333 0.425000000 0.441666667 0.458333333 0.475000000 0.491666667 0.508333333 0.525000000 0.541666667 [34] 0.558333333 0.575000000 0.591666667 0.608333333 0.625000000 0.641666667 0.658333333 0.675000000 0.691666667 0.708333333 0.725000000 [45] 0.741666667 0.758333333 0.775000000 0.791666667 0.808333333 0.825000000 0.841666667 0.858333333 0.875000000 0.891666667 0.908333333 [56] 0.925000000 0.941666667 0.958333333 0.975000000 0.991666667D> D2t=sort(D2) #2()t> xt2=c()> for(t in 1:n){+ xt2[t]=qchisq(pt[t],p)+ cat(xt2[t])+ }χ> xt2 #2t[1] 0.2700151 0.4844186 0.6415772 0.7757695 0.8969359 1.0096230 1.1163677 1.2187621 1.3178880 1.4145247 1.5092595 1.6025523[13] 1.6947743 1.7862337 1.8771930 1.9678806 2.0584996 2.1492342 2.2402545 2.3317204 2.4237845 2.5165951 2.6102978 2.7050379[25] 2.8009620 2.8982198 2.9969656 3.0973602 3.1995725 3.3037815 3.4101784 3.5189686 3.6303748 3.7446397 3.8620297 3.9828389[37] 4.1073944 4.2360619 4.3692534 4.5074361 4.65114344.8009895 4.95768735.1220712 5.2951282 5.4780385 5.67223005.8794549[49] 6.1018972 6.3423292 6.6043460 6.8927308 7.2140471 7.5776562 7.9975859 8.4962822 9.1131220 9.9275079 11.1432868 13.6954281> plot(D2t,pt)(2)2 图检验对三组观测数据分别检验是否来自4元正态分布对(1)组:> s1=cov(data1) > n1=dim(data1)[1]> mean1=apply(data1,2,mean) > data10=as.matrix(data1) > D2=c()> for(i in 1:n1){+ D2[i]=(data10[i,]-mean1)%*%solve(s1)%*%t(t(data10[i,]-mean1)) + cat(D2[i])}2.3566150.87569193.3047952.8114523.7483283.4172392.5699034.3461183.5919072.10211511.080623.6419633.0200982.0029151.7946376.334355.2382985.1832451.2706137.3091 > D2t=sort(D2) #2()t D > pt=c()> for(t in 1:n1){ + pt[t]=(t-0.5)/n1 + cat(pt[t])} #t p0.0250.0750.1250.1750.2250.2750.3250.3750.4250.4750.5250.5750.6250.6750.7250.7750.8250.8750.9250.975 > plot(D2t,pt)1234560.00.20.40.60.81.D2tp t对(2)组:> s2=cov(data2) > n2=dim(data2)[1]> mean2=apply(data2,2,mean) > data20=as.matrix(data2) > D2=c()> for(i in 1:n1){+ D2[i]=(data20[i,]-mean2)%*%solve(s2)%*%t(t(data20[i,]-mean2)) + cat(D2[i])}4.509632.6431977.4379630.67967053.0388361.3601842.0558582.1154885.1951254.1883834.02838810.814861.6963652.5080357.2389894.2872361.4593961.713325.6069173.422164> D2t=sort(D2) > pt=c()> for(t in 1:n1){ + pt[t]=(t-0.5)/n2 + cat(pt[t])}0.0250.0750.1250.1750.2250.2750.3250.3750.4250.4750.5250.5750.6250.6750.7250.7750.8250.8750.9250.975> plot(D2t,pt)2468100.00.20.40.60.81.0D2tp t2468100.00.20.40.60.81.D2tp t对(3)组:> s3=cov(data3) > n3=dim(data3)[1]> mean3=apply(data3,2,mean) > data30=as.matrix(data3) > D2=c()> for(i in 1:n1){+ D2[i]=(data30[i,]-mean3)%*%solve(s3)%*%t(t(data30[i,]-mean3)) + cat(D2[i])}3.9726173.3454264.6117212.2829991.4998854.7761745.5899942.3459172.2592941.0760154.2478350.3237727.2743363.3488044.9465015.1954879.4199912.7490492.926543.807644> D2t=sort(D2) > pt=c()> for(t in 1:n1){ + pt[t]=(t-0.5)/n3 + cat(pt[t])}0.0250.0750.1250.1750.2250.2750.3250.3750.4250.4750.5250.5750.6250.6750.7250.7750.8250.8750.9250.975> plot(D2t,pt)24680.00.20.40.60.81.D2tp t。

相关主题