library(car)
library(signal)
library(foreach)
library(visreg)
library(lmtest)
library(onewaytests)
library(sandwich)
library(MASS)
library(flexmix)
library(DescTools)
library(gtools)
library(permuco)
library(paran)
library(corrplot)
setwd("C:/Myfolder")

repeated <- read.csv("https://drive.switch.ch/index.php/s/8t1ZGqEYvZc0hUg/download")
thrones <- read.csv("https://drive.switch.ch/index.php/s/2L707SGrGIWvIty/download")

## HETEROSCEDASTICITY
set.seed(1606)
x <- scale(-150:150)
g1 <- sample(rep(0:1,times=c(150,151)))
g2 <- ifelse(g1==0,"A","B")
y <- 2*x+rnorm(301)
y2 <- 2*x+rnorm(301,0,seq(0,3,length.out=301))
y3 <- 2*g1+ifelse(g1==0,rnorm(150,0,0.8),rnorm(151,0,3))
data <- data.frame(y,y2,y3,x,g1,g2)

par(mar=c(5,5,4,0.5),mfrow=c(1,3),cex.lab=1.5,cex.axis=1.5,cex.main=1.5)
model1 <- lm(y~1+x,data=data)
visreg(model1,xvar="x",line.par=list(col="royalblue4",lwd=3),points.par=list(pch=4,col="darkgoldenrod3",cex=0.8),fill.par=list(col=adjustcolor("slategray3",0.5)),alpha=0.000000000000001,
xlab="X",ylab="Y",ylim=c(-6,10),main="Homoscedastic residuals")
model2 <- lm(y2~1+x,data=data)
visreg(model2,xvar="x",line.par=list(col="royalblue4",lwd=3),points.par=list(pch=4,col="darkgoldenrod3",cex=0.8),fill.par=list(col=adjustcolor("slategray3",0.5)),alpha=0.000000000000001,
xlab="X",ylab="Y",ylim=c(-6,10),main="Heteroscedastic residuals")
model3 <- lm(y3~g2,data=data)
visreg(model3,xvar="g2",line.par=list(col="royalblue4",lwd=3),points.par=list(pch=4,col="darkgoldenrod3",cex=0.8),fill.par=list(col=adjustcolor("slategray3",0.5)),alpha=0.00000000001,
xlab="Group",ylab="Y",ylim=c(-6,10),main="Heteroscedastic residuals")

summary(model1)
summary(model2)
summary(model3)

### WELCH CORRECTION AND HETEROSCEDASTICITY TESTS
t.test(y3~g2, data=data, var.equal=TRUE)
model <- lm(y3~g2, data=data)
summary(model)
leveneTest(model)

t.test(y3~g2, data=data, var.equal=FALSE)

bptest(y3~g2, data=data,studentize=FALSE)
hmctest(y3~g2, data=data)
gqtest(y3~g2, data=data)
bf.test(y3~g2, data=data)
ncvTest(model)

coeftest(model3, vcov=sandwich)

plot(abs(residuals(model2))~fitted(model2))
lines(lowess(fitted(model2),abs(residuals(model2)),f=0.3))


### MULTICOLLINEARITY
model <- lm(y~x+g2+x:g2,data=data)
vif(model)

ridge <- lm.ridge(y~x+g2+x:g2,data=data,lambda=0.1)
coef(model)


### INFLUENTIAL CASES (INFLUENCE MEASURES)
set.seed(1805)
randomdata <- data.frame(y=rnorm(200,0,10),g=rep(c("A","B"),each=100))
randomdata[2,1] <- 50

model <- lm(y ~ g, data=randomdata)
summary(model)
infIndexPlot(model,c("Cook", "Studentized", "Bonf"),col="royalblue4",pch=18) 	
influencePlot(model)
influence.measures(model)$infmat[,2]>0.05

model2 <- lm(y ~ g, data=randomdata[influence.measures(model)$infmat[,2]>0.05,])
summary(model2)

par(mar=c(5,5,4,0.5),mfrow=c(1,2),cex.lab=1.5,cex.axis=1.5,cex.main=1.5)
visreg(model,xvar="g",line.par=list(col="royalblue4",lwd=3),points.par=list(pch=4,col="darkgoldenrod3",cex=0.8),fill.par=list(col=adjustcolor("slategray3",0.5)),alpha=0.00000000001,
xlab="Group",ylab="Y",ylim=c(-50,50),main="Original data")
visreg(model2,xvar="g",line.par=list(col="royalblue4",lwd=3),points.par=list(pch=4,col="darkgoldenrod3",cex=0.8),fill.par=list(col=adjustcolor("slategray3",0.5)),alpha=0.00000000001,
xlab="Group",ylab="Y",ylim=c(-50,50),main="After DFBETA selection")


## SPHERICITY
repeated <- read.csv("repeated.csv")
mlm <- lm(cbind(D12,D13)~Gender,data=repeated)
round(residuals(mlm),2)
boxM(mlm)


## NORMALITY
sample(1:6,size=1)

rolls <- foreach(i=1:1000,.combine="c") %do% { sample(1:6,size=1) }
barplot(table(rolls)/1000,ylim=c(0,0.20),xlab="Outcome of one roll",ylab="Relative frequency")
abline(h=1/6,lty=2)

rolls <- foreach(i=1:1000,.combine="c") %do% { sample(1:6,size=1)+sample(1:6,size=1) }
barplot(table(rolls)/1000,ylim=c(0,0.20),xlab="Sum of two rolls",ylab="Relative frequency")
abline(h=1/6,lty=2)


## TYPES OF NON-PARAMETRIC DATA
x <- scale(1:500)
set.seed(667)
one <- 2*x + rnorm(500,0,2)
two <- x+2*x^2+2*x^3 + rnorm(500,0,3)
three <- 2*x + rlnorm(500,0,1)
four <- x+2*x^2+3*x^3 + rlnorm(500,0,1)

par(mfrow=c(2,3),mar=c(5,5,0.5,0.5),cex.lab=1.5,cex.axis=1.5)
plot(x,one,ylim=c(-10,10),pch=4,col="bisque3",xlab="X",ylab="Y")
lines(x,fitted(lm(one~x)),lwd=4,col="royalblue4")
plot(x,two,ylim=c(-20,25),pch=4,col="bisque3",xlab="X",ylab="Y")
lines(lowess(x,two,f=0.3),lwd=4,col="royalblue4")
plot(density(lowess(x,two,f=0.3)$y-two),lwd=2,col="darkgoldenrod4",main="")
plot(x,three,ylim=c(-5,15),pch=4,col="bisque3",xlab="X",ylab="Y")
lines(x,fitted(lm(three~x)),lwd=4,col="royalblue4")
plot(x,four,ylim=c(-10,30),pch=4,col="bisque3",xlab="X",ylab="Y")
lines(lowess(x,four,f=0.3),lwd=4,col="royalblue4")
plot(density(lowess(x,four,f=0.3)$y-four),lwd=2,col="darkgoldenrod4",main="")

plot(density(residuals(lm(three~x))))
qqnorm(residuals(lm(three~x))) ; qqline(residuals(lm(three~x)))
shapiro.test(rlnorm(500,0,1))

set.seed(1985)
skewed <- rlnorm(500,0,1)
ks.test(skewed,"pnorm")
shapiro.test(skewed)

ks.test(skewed,"plnorm")

set.seed(1252)
par(mar=c(5,5,0.5,0.5))
mvn <- mvrnorm(250,c(3,4),Sigma=matrix(c(1.5,0.9,0.9,1.5),2,2))
plot(mvn,pch=5,col="slategray4",ylim=c(-1,9),xlim=c(-1,7),xlab="X1",ylab="X2",cex=1.2)


## RANK TESTS
c(4,10,100,1,1,3,9)
rank(c(4,10,100,1,3,9))

cor.test(~Height+Volume,data=trees,method="spearman")

x <- scale(1:500)
set.seed(667)
y <- x+2*x^2+2*x^3 + rnorm(500,0,3)
monotone <- data.frame(x,y)

par(mar=c(5,5,4,0.5),mfrow=c(1,2))
plot(x,y,pch=4,col="slategray3",main="Before rank transform",xlab="X",ylab="Y")
lines(-200:200/100,(-200:200/100)+2*(-200:200/100)^2+2*(-200:200/100)^3,lwd=3,col="darkgoldenrod")
abline(coef(lm(y~x,data=monotone)),col="darkblue",lwd=4)
plot(rank(x),rank(y),pch=4,col="slategray3",main="After rank transform",xlab="rank(X)",ylab="rank(Y)")
abline(coef(lm(rank(y)~rank(x),data=monotone)),col="darkblue",lwd=4)

cor.test(x,y,method="spearman")
visreg(lm(mono~x,data=monotone),xvar="x")
visreg(lm(scale(rank(mono))~scale(rank(x)),data=monotone),xvar="x")


## PERMUTATION TESTS
### PARASITE EXAMPLE
library(gtools)
permutations(2,8,0:1,repeats=TRUE)
psychic <- permutations(2,8,0:1,repeats=TRUE)
barplot(table(rowSums(psychic)),ylim=c(0,100),xlab="Number correct",ylab="Frequency")
sum(rowSums(psychic)==8)

### EXACT PERMUTATION T-TEST
phd <- data.frame(student=c("Jon","Robb","Arya","Sansa","Bran"),mentoring=rep(c("No","Yes"),times=c(3,2)),drinks=c(5,7,4,10,5))
aggregate(drinks~mentoring,data=phd,FUN=mean)
t.test(drinks~mentoring,data=phd,var.equal=TRUE)

p1 <-  data.frame(student=c("Jon","Robb","Sansa","Arya","Bran"),mentoring=rep(c("No","Yes"),times=c(3,2)),drinks=c(5,7,10,4,5))
t.test(drinks~mentoring,data=p1,var.equal=TRUE)

permutations <- combn(c("Jon","Robb","Arya","Sansa","Bran"),3)
tvals <- foreach(i=1:ncol(permutations),.combine="c") %do% {
  select <- c(match(permutations[,i],phd$student),(1:5)[-match(permutations[,i],phd$student)])
  print(select)
  t.test(drinks[select]~mentoring,data=perm,var.equal=TRUE)$stat
}
plot(hist(tvals))
abs(tvals)
mean(abs(tvals)>=0.99)
mean(tvals<=-0.99)
mean(tvals>=-0.99)

par(mfrow=c(1,2),mar=c(5,5,3,0.5))
plot(density((tvals)),main="Raw t-values",col="royalblue4",lwd=3)
abline(v=c(-0.99,0.99),lty=2)
plot(density(abs(tvals)),main="Absolute t-values",lwd=3,col="royalblue4")
abline(v=c(0.99),lty=2)


### APPROXIMATE PERMUTATION T-TEST
set.seed(1606)
x <- scale(-150:150)
g1 <- sample(rep(0:1,times=c(150,151)))
g <- ifelse(g1==0,"A","B")
y <- 2*g1+ifelse(g1==0,rnorm(150,0,0.8),rnorm(151,0,3))
data <- data.frame(y,x,g)

set.seed(2337)
data$gp <- sample(data$g)

tvals <- foreach(P=1:999,.combine="c") %do% {
  set.seed(P)
  data$gp <- sample(data$g)
  t.test(y~gp,data=data,var.equal=FALSE)$stat
}
tvals <- c(tvals,t.test(y~g,data=data,var.equal=FALSE)$stat)
plot(density(tvals),main="",lwd=3,col="royalblue4")
abline(v=quantile(tvals,0.025),lty=2,col="firebrick")
abline(v=t.test(y~g,data=data,var.equal=FALSE)$stat,lty=2)

quantile(abs(tvals),0.95)
mean(abs(tvals)>=abs(tvals[1000]))

twotPermutation(y[g=="A"],y[g=="B"])


### PERMUTATION PAIRED T-TEST
thrones$Diff <- thrones$X2018-thrones$X2017

set.seed(1027)
flips <- sample(c(-1,1),size=nrow(thrones),replace=TRUE)
thrones$Diff*flips

onet.permutation(thrones$Diff)
t.test(thrones$X2017,thrones$X2018,paired=TRUE)


### PERMUTATION REGRESSION
thrones$Diff <- thrones$X2018-thrones$X2017

set.seed(2128)
lmperm(Diff~PI+Gender+Age,data=thrones)

set.seed(2128)
aovperm(Diff~PI+Gender+Age,data=thrones, method="freedman_lane")
plot(aovperm(Diff~PI+Gender+Age,data=thrones))


### PERMUTATION REPEATED MEASURES (M)ANOVA
thrones2 <- reshape(thrones,direction="long",varying=c("X2017","X2018"),idvar=c("Student","PI","Age","Gender"),times=c(2017,2018),v.names=c("Output"),timevar="Year")

set.seed(1856)
aovperm(Output~Year+PI + Error(Student/Year),data=thrones2,method="Rde_kheradPajouh_renaud")

repeated2 <- reshape(repeated,direction="long",varying=c("T1","T2","T3"),idvar="Subject",times=c("T1","T2","T3"),v.names="Score",timevar="Time")
repeated2
aovperm(Score~Time+Error(Subject/Time),data=repeated2)

source("C:/Myfolder/PRMM.r")
design <- data.frame(Time=1:3)
prmm(repeated[,4:6],design=design)


## PARALLEL ANALYSIS
set.seed(1203)
mdata <- matrix(runif(40*20,-5,5),40,20)
head(mdata)
colnames(mdata) <- paste("X",1:20,sep="")

egs <- eigen(cor(mdata))$values
par(mar=c(5,5,1,0.5),cex.lab=1.2,cex.axis=1.2)
plot(egs,pch=17,type="o",ylim=c(0,3),xlab="Principal component",ylab="Eigenvalue",cex=1.5,lwd=3)
abline(h=1,col="firebrick",lty=2,lwd=2)

PA <- foreach(i=1:1000,.combine="rbind") %do% {
  set.seed(i*i)
  perm <- matrix(runif(40*20,-5,5),40,20)
  egs <- eigen(cor(perm))$values
}
cutoff <- apply(PA,2,quantile,0.95)

par(mar=c(5,5,1,0.5),cex.lab=1.2,cex.axis=1.2)
plot(egs,pch=17,type="o",ylim=c(0,3),xlab="Principal component",ylab="Eigenvalue",cex=1.5,lwd=3)
abline(h=1,col="firebrick",lty=2,lwd=2)
points(apply(PA,2,quantile,0.50),pch=18,col="darkcyan")
arrows(1:20,apply(PA,2,quantile,0.025),1:20,apply(PA,2,quantile,0.975),col="darkcyan",code=3,angle=90,length=0.10,lwd=2)
lines(cutoff,type="o",pch=6,col="darkcyan",cex=1.5,lwd=3)

paran(mdata)


## BOOTSTRAP COMPARISON
set.seed(1606)
x <- scale(-150:150)
g1 <- sample(rep(0:1,times=c(150,151)))
g <- ifelse(g1==0,"A","B")
y <- 0.5*g1+ifelse(g1==0,rnorm(150,0,1),rnorm(151,0,1))
problem <- data.frame(y,x,g)
t.test(y~g,data=problem,var.equal=TRUE)

tfunc <- function(d,i) { t.test(y~g,data=d[i,],var.equal=TRUE)$statistic }
set.seed(105)
btvals <- boot(problem,tfunc,R=999,stype="i")
btvals$t

boot.ci(btvals)
