lois.R

##############################################################################
## lois.R
## illustrations pour la séance 2 de l'atelier AS du Cogmaster (2007-2008)
##
## chl, Dim oct 21 22:24:39 CEST 2007
## R version 2.6.0 (Mac OS X)
##
## (copyleft) 2007 -- www.aliquote.org
##############################################################################
n <- 10
x1 <- matrix(c(rbinom(n,1,.5),rbinom(n,1,.5),rbinom(n,1,.5),
               rbinom(n,1,.5),rbinom(n,1,.5)),nr=5,byrow=TRUE)
x2 <- sample(1:6,100,replace=TRUE,prob=rep(1/6,6))
x3 <- runif(100)
x4 <- rnorm(100)

par(mfrow=c(2,2),mar=rep(4,4))
#
plot(rep(1:n,5),rep(seq(1,5),each=10),pch=19,
     col=ifelse(as.logical(t(x1)),"green","red"),
     ylab="Séquence",xlab="Essai",main="Lancer d'une pièce",
     axes=FALSE)
axis(1,at=1:10,cex.axis=.8)
axis(2,at=1:5,las=1,tick=FALSE)
axis(4,at=1:5,labels=paste(apply(x1,1,sum),"/",n,sep=""),
     las=1,tick=FALSE)
mtext("Succès",side=3,line=0,col="green",adj=0,cex=.6)
mtext("Échec",side=3,line=.6,col="red",adj=0,cex=.6)
#
plot(table(x2),xlab="Résultat",ylab="N",main="Lancer d'un dé")
points(table(x2),pch=19)
text(1,max(table(x2))-2,"N=100",cex=.8,pos=4)
abline(h=100/6,lty=2,col="gray75")
#
hh <- hist(x3,breaks=5,xlab="x",ylab="N",
           main=expression(paste("Tirage uniforme ",
               bold(U)[group("[",list(0,1),"]")])))
points(hh$mids,hh$counts,pch=19,col="red")
lines(hh$mids,hh$counts,col="red")
#
hist(x4,prob=TRUE,ylab="Densité",xlab="x",
     main="Aléas gaussiens",cex.axis=.8)
xx <- seq(-3,3,by=.1)
lines(xx,dnorm(xx,0,1),col="red",lwd=2)
legend("topright","N(0,1)",text.col="red",bty="n")

##############################################################################
# Méthode angulaire de Marsaglia
n <- 500
v <- w <- z <- numeric(n)
# aléas uniformes sur le disque unité
for (i in 1:n) {
  t <- runif(1); u <- runif(1)
        v[i] <- 2*t-1
        w[i] <- 2*u-1
  a <- v[i]^2+w[i]^2
  z[i] <- ifelse(a<=1,a,NA)
}
summary(z)

##############################################################################
# Simulation de V.A. gaussiennes
sim.norm <- function(n,mu,sigma) {
  x <- numeric(n)
  for (i in 1:m) 
          x[i] <- mu + sigma*sqrt(12/n)*(sum(runif(n))-n/2)
        return(x)
}
hist(sim.norm(100,10,1),prob=TRUE,ylim=c(0,.45))
xx <- seq(10-3,10+3,by=.1)
lines(xx,dnorm(xx,10,1),col="red")

##############################################################################
# Jeu pile/face
rbinom(10,1,.5)
x <- sample(c(0,1),10,replace=TRUE)
table(x)
choose(10,3)

##############################################################################
# Jeu pile/face, LGN
n <- 20; m <- 10
x <- matrix(rbinom(n*m,1,.5),nc=m)
table(x[,1])    # séquence 1
sum(x[,1])/n
apply(x,2,sum)/n
?replicate

##############################################################################
# simulation d'un lancer de pièces
n <- seq(10,1000,by=10)
f <- NULL

my.prop <- function(x)
  return(table(x)/sum(table(x)))

for (i in n)
  f <- append(f,as.numeric(my.prop(rbinom(i,1,.5))[2]))

plot(n,f,ylim=c(0,1))
abline(h=.5,col="red")

plot(n,f-.5,type="s",ylim=c(-0.2,0.2),ylab="f-F")
abline(h=0,col="red")

##############################################################################
# Distribution d'échantillonnage
k <- 25
n <- 25
f <- numeric(n)
j <- 1
while (j < (k+1)) {
  f[j] <- as.numeric(my.prop(rbinom(n,1,.5))[2])
  j <- j+1
}
hist(f)

# même chose sous forme de fonction
sim <- function(k,n=25) {
        f <- numeric(k)
        j <- 1
        while (j < (k+1)) {
          f[j] <- as.numeric(my.prop(rbinom(n,1,.5))[2])
          j <- j+1
        }
        return(f)
}

nn <- c(25,50,100,200)
xx <- seq(0,1,by=.1)
par(mfrow=c(2,2))
for (i in 1:4)
        hist(sim(nn[i]),xlab=expression(f[obs]),ylab="N",
             main=paste("n=",nn[i],sep=""))

# version optimisée et compacte
f2 <- replicate(25,my.prop(rbinom(25,1,.5))[2])
hist(as.numeric(f2))

##############################################################################
# jeu de dés
lance.des <- function(n,k=1) {
  matrix(replicate(n*k,sample(1:6,size=1,replace=TRUE)),nr=n,nc=k)
}
lance.des(10)
lance.des(10,2)
plot(table(apply(lance.des(40,2),1,sum)),xlim=c(2,12))

##############################################################################
# Association entre deux variables binaires (1)
odds.ratio <- function(x,alpha=.05) {
  if (!is.table(x)) x <- as.table(x)
  pow <- function(x,a=-1) x^a
  or <- (x[1,1]*x[2,2]) / (x[1,2]*x[2,1])
  or.se <- sqrt(sum(pow(x)))
  or.ci <- exp(log(or) + c(-1,1)*qnorm(1-alpha/2)*or.se)
  return(list(OR=or,SE=or.se,CI=or.ci))
}
# exemple tiré de Agresti (p. 72)
x <- matrix(c(28,18,656,658),nr=2) 
dimnames(x) <- list(c("Placebo","Aspirin"),c("yes","no"))
odds.ratio(x)

library(vcd) 
x.or <- oddsratio(x,log=FALSE)
confint(x.or)
plot(x.or)
fourfold(x)

##############################################################################
# Association entre dexu variables binaires (2)
x <- cbind(c(50,10),c(20,40))
dimnames(x) <- list(c("+","-"),
                    c("+","-"))
mcnemar.test(x)
mcnemar.test(x,correct=FALSE)
binom.test(apply(x,1,sum),.5)
mosaicplot(x,col=2:3,cex.axis=2)

##############################################################################
library(exactLoglinTest)
data(pathologist.dat)    # Agresti, p. 263
help(pathologist.dat)

(aa <- xtabs(y~A+B,data=pathologist.dat))
sum(diag(aa))/sum(aa)

attach(pathologist.dat)
s <- a <- b <- NULL
for (i in 1:nrow(pathologist.dat)) {
  k <- y[i]
  if (k != 0) {
    a <- append(a,rep(A[i],k))
    b <- append(b,rep(B[i],k))  
  }
}
aa2 <- data.frame(A=a,B=b)  
nrow(aa2) == sum(aa)      # vérification
library(irr)
kappa2(aa2)
kappa2(aa2,"weighted")