##############################################
## BEN MEULEMAN
## WORKSHOP ON LOGISTIC REGRESSION
## PART II
## 17.11.2022
##############################################
library(visreg)
library(car)
library(AMR)
library(emmeans)
library(faraway)
library(parameters)
library(BFpack)
library(caTools)
library(DescTools)
library(vcd)
index <- function(v) { data.frame(V=colnames(v)) }
weighting <- function(v) {
  distr <- table(v)
  nms <- names(distr)
  target <- sum(distr)/2
  ifelse(as.character(v)==nms[1],target/distr[1],target/distr[2])
}

## CHI-SQUARE ANALYSIS
GOT <- read.csv("https://drive.switch.ch/index.php/s/WHfgSnHYRwtSTSl/download",header=TRUE,as.is=FALSE)
xtabs(~Wedding+Death, data=GOT)
proportions(xtabs(~Wedding+Death, data=GOT),1)
chisq.test(xtabs(~Wedding+Death, data=GOT))
fisher.test(xtabs(~Wedding+Death, data=GOT))


## NAIVE REGRESSION
GOT$DeathBin <- ifelse(GOT$Death=="No",0,1)
GOT$WeddingBin <- ifelse(GOT$Wedding=="No",0,1)
model <- lm(DeathBin~Wedding, data=GOT)
summary(model)
visreg(model,points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),ylab="Proportion of deaths")


## LOGIT
model <- glm(Death~1, data=GOT, family=binomial(link="logit"))
summary(model)

model <- glm(Death~Wedding, data=GOT, family=binomial(link="logit"))
summary(model)
visreg(model,scale="response",points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=2,col=c("royalblue4")),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",ylim=c(0,1))


## LOG-LINEAR
GOTDF <- as.data.frame.table(xtabs(~Wedding+Death, data=GOT))
model <- glm(Freq~Wedding*Death, data=GOTDF, family=poisson)
summary(model)
Anova(model)


## G-TEST
g.test(xtabs(~Wedding+Death, data=GOT))


## CONTINUOUS PREDICTOR
Boxplot(GOT$Age,id=list(labels=GOT$Character))
model <- glm(Death~Age, data=GOT, family=binomial, subset=Age<400)
summary(model)

par(mfrow=c(1,2))
visreg(model,points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Log-odds of death versus survival",ylim=c(-3,3),jitter=TRUE)
abline(h=c(0,0.7885),lty=2,col=c("darkred","darkmagenta"))
visreg(model,points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",ylim=c(0,1),scale="response",jitter=TRUE)
abline(h=c(0.5,0.6875),lty=2,col=c("darkred","darkmagenta"))


## MULTIPLE PREDICTORS
model <- glm(Death~Age+Gender+Profession+Class+Travelled+Battles+Killed, data=GOT, family=binomial, subset=Age<400)
summary(model)
Anova(model)

visreg(model,xvar="Profession",ylim=c(0,1),scale="response",points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",jitter=TRUE)
abline(h=c(0.5,0.6875),lty=2,col=c("darkred","goldenrod"))

reduced <- glm(Death~Age+Gender+Class+Travelled+Battles+Killed, data=GOT, family=binomial, subset=Age<400)
full <- glm(Death~Age+Gender+Profession+Class+Travelled+Battles+Killed, data=GOT, family=binomial, subset=Age<400)
anova(reduced,full, test="LRT")

Anova(model)

em <- emmeans(model,specs="Profession")
pairs(em, adjust="none", type="prob")

par(mfrow=c(2,3),cex.lab=1.5,cex.axis=1.5,mar=c(5,5,1,0.5))
visreg(reduced,ylim=c(0,1),scale="response",points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",jitter=TRUE)


## INTERACTIONS
model <- glm(Death~Age*Battles, dat=GOT, subset=Age<400, family=binomial)
summary(model)
Anova(model)

visreg(model,xvar="Age",by="Battles",breaks=c(0,3,8), overlay=TRUE, scale="response",
  line.par=list(lwd=3,col=c("goldenrod","darkcyan","royalblue4")),
  fill.par=list(col=adjustcolor(c("goldenrod","darkcyan","royalblue4"),alpha=0.1)),
  ylab="Probability of death versus survival",jitter=TRUE)
abline(h=0.5,lty=2,lwd=2)

colpal <- colorRampPalette(c("white","azure2","steelblue4"))
visreg2d(model,"Age","Battles",scale="response",plot.type="persp",col="steelblue4",nn=9,border="grey80",theta=310,phi=25,zlim=c(0,1),shade=0.5)
visreg2d(model,"Age","Battles",scale="response",color=colpal(100),nn=99,main="",zlim=c(0,1))


## EFFECT SIZE
model <- glm(Death~Age+Gender+Profession+Class+Travelled+Battles+Killed, data=GOT[GOT$Age<400,], family=binomial)
exp(confint(model))
data.frame(OR=exp(coef(model)),exp(confint(model)))

em <- emmeans(model,"Profession")
pairs(em, adjust="none", type="response")

standardize_parameters(model, method="refit", two_sd=TRUE, exponentiate=TRUE)

1/BF(model)$BFtu_exploratory


## DIAGNOSTICS
model <- glm(Death~Age+Gender+Profession+Class+Travelled+Battles+Killed, data=GOT, family=binomial, subset=Age<400)
residuals(model)
halfnorm(residuals(model))
vif(model)

xtabs(~Religion+Death,data=GOT)
model <- glm(Death~Religion, data=GOT, family=binomial)
summary(model)

model <- glm(Death~Age, data=GOT, family=binomial)
influence.measures(model)
round(influence.measures(model)$infmat[120,],2)

plot(DeathBin~Age, data=GOT, subset=Age<400, pch="|", col="burlywood4",ylab="Death")
lines(runmean(DeathBin,11)~Age, data=GOT[order(GOT$Age),],col="darkcyan",lwd=2)

model <- glm(Death~poly(Age,3), data=GOT, family=binomial, subset=Age<400)
summary(model)
visreg(model,points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",ylim=c(0,1),scale="response",jitter=TRUE)
abline(h=c(0.5,0.6875),lty=2,col=c("darkred","darkmagenta"))

## GOODNESS-OF-FIT
set.seed(1985)
GOT$Random <- rnorm(208)
model <- glm(Death~Age+Gender+Profession+Class+Travelled+Battles+Killed, data=GOT, family=binomial)
null <- glm(Death~1, data=GOT, family=binomial)
random <- glm(Death~0+Random, data=GOT, family=binomial)

GOT$Predicted <- ifelse(predict(model,type="response")>0.5,"Yes","No")
GOT$Predicted_null <- factor(ifelse(predict(null,type="response")>0.5,"Yes","No"),levels=c("No","Yes"))
GOT$Predicted_random <- ifelse(predict(random,type="response")>0.5,"Yes","No")

conf1 <- xtabs(~Predicted+Death,data=GOT) ; xtabs(~Predicted+Death,data=GOT)
conf2 <- xtabs(~Predicted_null+Death,data=GOT) ; xtabs(~Predicted_null+Death,data=GOT)
conf3 <- xtabs(~Predicted_random+Death,data=GOT) ; xtabs(~Predicted_random+Death,data=GOT) 

MCC(conf1) ; MCC(conf2) ; MCC(conf3)
CohenKappa(conf1) ; CohenKappa(conf2) ; CohenKappa(conf3)
AIC(model) ; AIC(null) ; AIC(random)
BIC(model) ; BIC(null) ; BIC(random)

ROC <- roc(GOT$Death,predict(model,type="response"),plot=TRUE)
ROC
ROC$thresholds
plot((1-ROC$specificities),ROC$sensitivities,pch=15,col=rep(c("bisque3","darkblue","bisque3"),times=c(106,1,96)),type="l",lwd=2,xlab="1 - Specificity",ylab="Sensitivity")
points((1-ROC$specificities)[107],ROC$sensitivities[106],col="darkblue",cex=1.5,pch=4,lwd=2)
abline(a=0,b=1,lwd=2)

max(ROC$sensitivities-(1-ROC$specificities))
which.max(ROC$sensitivities-(1-ROC$specificities))
ROC$threshold[107]

GOT$Predicted_ROC <- ifelse(predict(model,type="response")>0.718,"Yes","No")
conf3 <- xtabs(~Predicted_ROC+Death,data=GOT) ; xtabs(~Predicted_ROC+Death,data=GOT)
MCC(conf3)
Assocs(conf3)



## CASE WEIGHTS
model <- glm(Death~Wedding, data=GOT, family=binomial, weights=rep(20,208))
summary(model)
par(mfrow=c(1,2))
visreg(model,points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",ylim=c(0,1),scale="response",jitter=TRUE)
abline(h=c(0.5,0.6875),lty=2,col=c("darkred","darkmagenta"))

model <- glm(Death~Age+Gender+Profession+Class+Travelled+Battles+Killed, data=GOT, family=binomial, subset=Age<400, weights=weighting(Death))
summary(model)
Anova(model)

par(mfrow=c(2,4),cex.lab=1.5,cex.axis=1.5,mar=c(5,5,1,0.5))
visreg(model,ylim=c(0,1),scale="response",points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",jitter=TRUE)


## STEPWISE PREDICTOR SELECTION
lower <- glm(Death~1,data=GOT[,-c(1,4)],family=binomial)
upper <- glm(Death~.,data=GOT[,-c(1,4)],family=binomial)

SPS <- step(upper, scope=list(lower=lower,upper=upper),direction="both", k=2)
summary(SPS)
predstep <- factor(ifelse(predict(SPS,type="response")>0.5,"Yes","No"))
conf <- table(Predicted=predstep,Observed=GOT$Death)
MCC(conf)
roc(GOT$Death,predict(SPS,type="response"),plot=TRUE)

model <- glm(Death~Travelled+Children+Killed+Crowned, data=GOT, family=binomial)
par(mfrow=c(2,2),cex.lab=1.2,cex.axis=1.2,mar=c(5,5,1,0.5))
visreg(model,ylim=c(0,1),scale="response",points.par=list(pch=4,col="bisque3",cex=1),line.par=list(lwd=3,col="royalblue4"),fill.par=list(col=adjustcolor("steelblue",alpha=0.2)),
 ylab="Probability of death versus survival",jitter=TRUE)

