##############################################
## BEN MEULEMAN
## WORKSHOP ON LOGISTIC REGRESSION
## PART I
## 17.11.2022
##############################################
library(DescTools)
library(vcd)
library(plyr)
library(dplyr)
library(car)


## PROPORTIONS
prop.test(x=25,n=25, correct=FALSE)

binom.test(x=13,n=32)
binom.test(x=25,n=25)
BinomCI(x=25,n=25,method="jeffreys")


## ODDS-RATIOS
freqtable <- array(c(4,9,13,6),dim=c(2,2),dimnames=list(Fight=c("One duck","Hundred horses"),Death=c("When","How")))
chisq.test(tab, correct=FALSE)

OddsRatio(freqtable,conf.level=0.95)
loddsratio(freqtable)
fisher.test(freqtable)

propvec <- 0:1000/1000
oddvec <- propvec/(1-propvec)
loddvec <- log(oddvec)

par(mfrow=c(1,3),mar=c(5,5,1,0.5),cex.lab=2,cex.axis=2)
plot(propvec,oddvec,type="l",col="limegreen",lwd=3,ylim=c(0,10),xlab="Proportion",ylab="Odds") ; abline(h=1,v=0.5,lty=2,col="steelblue4",lwd=1)
plot(propvec,loddvec,type="l",col="limegreen",lwd=3,xlab="Proportion",ylab="Log odds") ; abline(h=0,v=0.5,lty=2,col="steelblue4",lwd=1)
plot(oddvec,loddvec,type="l",col="limegreen",lwd=3,xlim=c(1,10),xlab="Odds",ylab="Log odds") ; abline(h=0,v=1,lty=2,col="steelblue4",lwd=1)


## CHI-SQUARE ANALYSIS
catfood <- array(c(6,4,13,10,2,3),dim=c(2,3),dimnames=list(Cat_type=c("Pet cat","Shelter cat"),Food_type=c("Chicken","Tuna","Soft meat")))
chisq.test(catfood)
margin.table(catfood,1)
margin.table(catfood,2)
chisq.test(margin.table(catfood,2))
Assocs(catfood)

mosaic(cattoy,shade=TRUE)

fear <- read.csv("https://drive.switch.ch/index.php/s/bLYb349zJZyz7wn/download",header=TRUE,as.is=FALSE)
families <- read.csv("https://drive.switch.ch/index.php/s/kCvB0drFcLCVmqQ",header=FALSE,as.is=FALSE)
Family <- factor(mapvalues(fear$word2,from=as.character(families$V2),to=as.character(families$V4)),levels=c("Neutral","Joy","Fascination","Fear"))
Depth <- fear$depth
Word <- fear$word2
head(fear)
feartable <- table(Depth,Family)
feartable
chisq.test(feartable)
feartest <- chisq.test(feartable)

mosaic(feartable,shade=TRUE,rot_labels=c(90,90,0,0),just_labels=c("left","right"),varnames=c(FALSE,FALSE),gp_labels=gpar(fontsize=12,fontface=2))

chisq.test(feartable[,1])
chisq.test(feartable[,2])
chisq.test(feartable[,3])
chisq.test(feartable[,4])

OddsRatio(feartable[c(1,4),c(1,4)],conf.level=0.95)
Assocs(feartable)


## LOG-LINEAR ANALYSIS
deathpenalty <- array(c(53,11,414,37,0,4,16,139),dim=c(2,2,2),dimnames=list(Race_defendant=c("White","Black"),Death_penalty=c("No","Yes"),Race_victim=c("White","Black")))
chisq.test(deathpenalty)

dpdf <- as.data.frame.table(deathpenalty)
dpdf

xtabs(Freq~Race_defendant+Death_penalty+Race_victim, data=dpdf)

model <- glm(Freq~Race_defendant*Death_penalty*Race_victim, data=dpdf, family=poisson)
Anova(model, type=2)

cathuman <- array(c(5,0,6,3,10,14),dim=c(2,3),dimnames=list(Cat_type=c("Pet cat","Shelter cat"),Social_activity=c("Talking","Petting","Playing")))
cathdf <- as.data.frame.table(cathuman)
chisq.test(cathuman)

model0 <- glm(Freq~1, data=cathdf, family=poisson)
exp(predict(model0))

model1 <- glm(Freq~Cat_type, data=cathdf, family=poisson)
exp(predict(model1))
anova(model0,model1,test = "Chisq")

model2 <- glm(Freq~Cat_type+Social_activity, data=cathdf, family=poisson)
exp(predict(model2))
anova(model1,model2,test = "Chisq")
matrix(exp(residuals(model2)),2,3)

model3 <- glm(Freq~Cat_type*Social_activity, data=cathdf, family=poisson)
exp(predict(model3))
anova(model2,model3,test = "Chisq")
summary(model)
Anova(model3,type=2)


## SIMPSON'S PARADOX
deathpenalty <- array(c(53,11,414,37,0,4,16,139),dim=c(2,2,2),dimnames=list(Race_defendant=c("White","Black"),Death_penalty=c("No","Yes"),Race_victim=c("White","Black")))
dpdf <- as.data.frame.table(deathpenalty)
chisq.test(xtabs(Freq~Race_defendant+Death_penalty, data=dpdf))
fisher.test(xtabs(Freq~Race_defendant+Death_penalty, data=dpdf))

model <- glm(Freq~Race_defendant*Death_penalty, data=dpdf, family=poisson)
Anova(model, type=2)

model <- glm(Freq~Race_defendant*Death_penalty, data=dpdf, family=poisson, subset=Race_victim=="White")
Anova(model, type=2)
fisher.test(xtabs(Freq~Race_defendant+Death_penalty, data=dpdf, subset=Race_victim=="White"))

model <- glm(Freq~Race_defendant*Death_penalty, data=dpdf, family=poisson, subset=Race_victim=="Black")
Anova(model, type=2)
fisher.test(xtabs(Freq~Race_defendant+Death_penalty, data=dpdf, subset=Race_victim=="Black"))

model <- glm(Freq~(Race_defendant+Death_penalty+Race_victim)^2, data=dpdf, family=poisson)
Anova(model, type=2)

model <- glm(Freq~Race_defendant*Death_penalty*Race_victim, data=dpdf, family=poisson)
Anova(model, type=2)


model1 <- glm(Freq~(Race_defendant+Death_penalty)*Race_victim, data=dpdf, family=poisson)
model2 <- glm(Freq~(Race_defendant+Death_penalty+Race_victim)^2, data=dpdf, family=poisson)
anova(model1,model2)

sum(exp(predict(model2))-dpdf$Freq)/16
