###############################################
## MACHINE LEARNING WORKSHOP
## SCRIPT 07 - FLEXIBLE REGRESSION WITH SPLINES
###############################################


## LOAD REQUIRED PACKAGES
library(earth)

### CLASSIFICATION
train <- read.csv("https://drive.switch.ch/index.php/s/8nNrBeeIOxilaKP/download",header=TRUE,as.is=FALSE)
head(train) ; dim(train)
test <- read.csv("https://drive.switch.ch/index.php/s/wS6WmzMYCMTRgzH/download",header=TRUE,as.is=FALSE)
head(test) ; dim(test)

par(mar=c(5,5,1,1),cex.axis=1.2,cex.lab=1.5,mfrow=c(1,2))
plot(test[,1:2],col=as.character(test[,3]),pch="~")
points(train[,1:2],col=as.character(train[,3]),xlim=range(test$X1),ylim=range(test$X2),pch=18,cex=1.2)
plot(test[,1:2],col=as.character(test[,3]),pch="~")

mars <- earth(Class~X1+X2,data=train,degree=2)
mars.pred <- predict(mars,newdata=test,type="class")
mean(mars.pred!=test$Class)

par(mar=c(5,5,1,1),cex.axis=1.2,cex.lab=1.5,mfrow=c(1,2))
plot(test[,1:2],col=as.character(test[,3]),pch="~",cex=1)
points(train[,1:2],col=as.character(train[,3]),pch=18,cex=1.2)
plot(test[,1:2],col=as.character(predict(mars,test,type="class")),pch="~")

summary(mars)
evimp(mars)

set.seed(1401)
mars <- earth(Class~X1+X2,data=train,degree=2,pmethod="cv",nfold=5,trace=1)
mars.pred <- predict(mars,newdata=test,type="class")
mean(mars.pred!=test$Class)

par(mar=c(5,5,1,1),cex.axis=1.2,cex.lab=1.5,mfrow=c(1,2))
plot(test[,1:2],col=as.character(test[,3]),pch="~",cex=1)
points(train[,1:2],col=as.character(train[,3]),pch=18,cex=1.2)
plot(test[,1:2],col=as.character(predict(mars,test,type="class")),pch="~")


### REGRESSION
set.seed(668)
x <- 1:500
true <- 100*sin(0.02*x) + 0.5*x
y <- true + rnorm(500,0,25)
par(mar=c(5,5,1,1),cex.lab=1.2,cex.axis=1.2)
plot(x,y,pch="+",col="grey70",xlab="X",ylab="Y",cex=1.2)

xval <- 1:5000/10
mars <- earth(y~x,data=data.frame(x,y),pmethod="cv",nfold=5)
lin <- lm(y~x)
poly <- lm(y~poly(x,degree=4))
predlin <- predict(lin,newdata=data.frame(x=xval))
predpoly <- predict(poly,newdata=data.frame(x=xval))
predmars <- predict(mars,newdata=data.frame(x=xval))

par(mfrow=c(1,1),mar=c(4,5,3,0.5),cex.lab=1.5,cex.axis=1.5,cex.main=1.5)
plot(x,y,pch="+",col="grey80",xlab="X",ylab="Y",cex=1.2)
lines(xval,predlin,col="darkcyan",lwd=2)
lines(xval,predmars,col="firebrick",lwd=4)
lines(xval,predpoly,col="royalblue3",lwd=3)
legend("topleft",legend=c("Linear","Polynomial (4)","MARS"),lwd=3,seg.len=3,col=c("darkcyan","royalblue3","firebrick"),bty="n")
