4.Variance

#sum of squares
#########Variance: A Worked Example##########
ozone<- span=""> read.table('gardens.txt',header=T)
attach(ozone)
ozone
##    gardenA gardenB gardenC
## 1        3       5       3
## 2        4       5       3
## 3        4       6       2
## 4        3       7       1
## 5        2       4      10
## 6        3       4       4
## 7        1       3       3
## 8        3       5      11
## 9        5       6       3
## 10       2       5      10
mean(gardenA)
## [1] 3
mean(ozone$gardenA)
## [1] 3
mean(ozone[,1])
## [1] 3
gardenA-mean(gardenA)
##  [1]  0  1  1  0 -1  0 -2  0  2 -1
(gardenA-mean(gardenA))^2
##  [1] 0 1 1 0 1 0 4 0 4 1
sum((gardenA-mean(gardenA))^2) #sum of squares
## [1] 12
sum((gardenA-mean(gardenA))^2)/(length(gardenA-1)) #the sum of squares #degree of 9
## [1] 1.2
mean(gardenB)
## [1] 5
gardenB-mean(gardenB)
##  [1]  0  0  1  2 -1 -1 -2  0  1  0
(gardenB-mean(gardenB))^2
##  [1] 0 0 1 4 1 1 4 0 1 0
sum((gardenB-mean(gardenB))^2)
## [1] 12
sum((gardenB-mean(gardenB))^2)/(length(gardenB)-1)
## [1] 1.333333
mean(gardenC)
## [1] 5
gardenC-mean(gardenC)
##  [1] -2 -2 -3 -4  5 -1 -2  6 -2  5
(gardenC-mean(gardenC))^2
##  [1]  4  4  9 16 25  1  4 36  4 25
sum((gardenC-mean(gardenC))^2)
## [1] 128
sum((gardenC-mean(gardenC))^2)/(length(gardenC)-1)
## [1] 14.22222
#F test 
var(gardenC)/var(gardenB)
## [1] 10.66667
2*(1-pf(10.667,9,9)) #The F Distribution
## [1] 0.001624002
help(pf)
## starting httpd help server ... done
var.test(gardenB,gardenB) #built-in F test:
## 
##  F test to compare two variances
## 
## data:  gardenB and gardenB
## F = 1, num df = 9, denom df = 9, p-value = 1
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2483859 4.0259942
## sample estimates:
## ratio of variances 
##                  1
gardenC
##  [1]  3  3  2  1 10  4  3 11  3 10
#########Variance and Sample Size
plot(c(0,32),c(0,15),type='n',xlab='Simple size',ylab='Variance')
for(n in seq(3,31,2)){
  for(i in 1:30){
    x<- span=""> rnorm(n,mean=10,sd=2)
    points(n,var(x))
  }
}
#########A Measure of Unreliability
sqrt(var(gardenA)/10)
## [1] 0.3651484
sqrt(var(gardenB)/10)
## [1] 0.3651484
sqrt(var(gardenC)/10)
## [1] 1.19257
########Confidence Intervals
#The first argument in qt() is the probability and the second is the #degrees of freedom
#left 95% confidence interval  value of Student’s t associated with α  
qt(.025,9)#Sample size 10,d.f 10-1
## [1] -2.262157
qt(.025,df=9)
## [1] -2.262157
##right 95% confidence interval  value of Student’s t associated with α 
qt(.975,9)
## [1] 2.262157
#values of t for 99% are bigger than these (0.005 in each tail):
qt(.995,9)
## [1] 3.249836
##99.5% confidence are bigger still (0.0025 in each tail):
qt(.9975,9)
## [1] 3.689662
#Bootstrap

data<- span=""> read.csv('skewdata.txt')
attach(data)
names(data)
## [1] "values"
#This draws the plain graph
plot(c(0,30),c(0,60),type='n',xlab='Sample size',ylab='Confidence interval')
for(k in seq(5,30,3)){
  a<- span=""> numeric(10000)
  for(i in 1:10000){
    a[i]<- span=""> mean(sample(values,k,replace=T))
  }
points(c(k,k),quantile(a,c(.025,.975)),type='b',pch=21,bg='red')
}
#At n= 30, the bootstrapped CI based on 10 00 simulations
quantile(a,c(.025,.975))
##     2.5%    97.5% 
## 24.90652 37.90855
#bootstrapped intervals compared with the intervals calculated from the normal (blue solid line
xv <- span=""> seq(5,30,0.1)
yv <- span=""> mean(values)+1.96*sqrt(var(values)/xv)
plot(c(0,30),c(0,60),type='n',xlab='Sample size',ylab='Confidence interval')
for(k in seq(5,30,3)){
  a<- span=""> numeric(10000)
  for(i in 1:10000){
    a[i]<- span=""> mean(sample(values,k,replace=T))
  }
  points(c(k,k),quantile(a,c(.025,.975)),type='b',pch=21,bg='red')
}
lines(xv,yv,col="blue")
yv <- span=""> mean(values)-1.96*sqrt(var(values)/xv)
lines(xv,yv,col="blue")
#Student’s t distribution (green dotted line):
yv <- span=""> mean(values)-qt(.975,xv-1)*sqrt(var(values)/xv)
lines(xv,yv,lty=2,col="green")
yv <- span=""> mean(values)+qt(.975,xv-1)*sqrt(var(values)/xv)
lines(xv,yv,lty=2,col="green")
#########################
points<- span=""> c(28,26,10,27,20,38,23,28,25,2)
x.m<- span=""> function(x) {
  #sprintf('x-mean=%f ',x-mean(x));
  x-mean(x)
}
x.m.t<- span=""> function(x) {
  #x.m() 
  #sprintf('x-mean(x)/length(x)=%f ',x-mean(x)/length(x))  
  ((x-mean(x))^2)
}
x_mean<- span=""> x.m(points)
x_mean_sqrt<- span=""> x.m.t(points)
x_mean
##  [1]   5.3   3.3 -12.7   4.3  -2.7  15.3   0.3   5.3   2.3 -20.7
x_mean_sqrt
##  [1]  28.09  10.89 161.29  18.49   7.29 234.09   0.09  28.09   5.29 428.49
sum(x_mean)/length(points)
## [1] 7.105427e-16
sum(x_mean_sqrt)/length(points)
## [1] 92.21
sqrt(sum(x_mean_sqrt)/length(points))
## [1] 9.602604
mean(points)
## [1] 22.7
(mean(points))^2
## [1] 515.29
var(points)
## [1] 102.4556

Nikkei225

28000-28550 up in the early session, down lately.