#define multiplot function 
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

#set WD
#load sessional accuracy estimates from machine classifier
session2015 <- read.csv("data/MLEstimatesLowerH/sgd_individual_estimates_2015.csv")
  
  
#get party info
persinfo <- read.csv("data/person_ids_parties.csv")
persinfo <- persinfo[persinfo$Language=="DE",]
persinfo <- subset(persinfo,select=c(PersonNumber,PartyAbbreviation,LastName))

#merge on PersonNumber
session2015 <- merge(session2015,persinfo,by="PersonNumber")

#ecdf for the SVP
session2015SVP <- session2015[session2015$PartyAbbreviation == "SVP",]
session2015SVP <- aggregate(V~PersonNumber + PartyAbbreviation + LastName, data=session2015SVP,FUN=mean)

names(session2015SVP)[names(session2015SVP)=="V"] <- "score"

Fn <- ecdf(session2015SVP$score)
session2015SVP$ecdf <- Fn(session2015SVP$score)
session2015SVP <- session2015SVP[order(session2015SVP$ecdf),]
session2015SVP$mp_id <- 1:nrow(session2015SVP)

library(ggplot2)
library(ggrepel)
library(plyr)

labelDataSVP <- session2015SVP[session2015SVP$score < 0.4 | session2015SVP$score > 0.7,]

session2015SVP$LastName <- as.character(session2015SVP$LastName)

session2015SVPplot <- ggplot(session2015SVP,aes(x=score,y=ecdf))  +
  xlab("") + ylab("") +
  geom_point(data=session2015SVP,aes(x=score,y=ecdf),alpha=0.8) + 
  geom_text_repel(data = labelDataSVP, 
                  aes(y=ecdf, label = labelDataSVP$LastName),fontface = 'bold', color = 'black',
                  box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.5, "lines"),
                  segment.color = 'grey',size = 3) + 
  geom_point(data = labelDataSVP,
             aes(y=ecdf), shape=21,colour="black",size=3) +
  scale_color_manual(values=c("dodgerblue2", "red4")) +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) + 
  ggtitle("ECDF for the SVP, 2015 Session") + 
  theme_classic()

session2015SVPplot

#ecdf for FDP
session2015FDP <- session2015[session2015$PartyAbbreviation == "FDP-Liberale",]
session2015FDP <- aggregate(RL~PersonNumber + PartyAbbreviation + LastName, data=session2015FDP,FUN=mean)

names(session2015FDP)[names(session2015FDP)=="RL"] <- "score"

Fn <- ecdf(session2015FDP$score)
session2015FDP$ecdf <- Fn(session2015FDP$score)
session2015FDP <- session2015FDP[order(session2015FDP$ecdf),]
session2015FDP$mp_id <- 1:nrow(session2015FDP)

library(ggplot2)
library(ggrepel)
library(plyr)

labelDataFDP <- session2015FDP[session2015FDP$score < 0.2 | session2015FDP$score > 0.6,]

session2015FDP$LastName <- as.character(session2015FDP$LastName)

session2015FDPplot <- ggplot(session2015FDP,aes(x=score,y=ecdf))  +
  xlab("") + ylab("") +
  geom_point(data=session2015FDP,aes(x=score,y=ecdf),alpha=0.8) + 
  geom_text_repel(data = labelDataFDP, 
                  aes(y=ecdf, label = labelDataFDP$LastName),fontface = 'bold', color = 'black',
                  box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.5, "lines"),
                  segment.color = 'grey',size = 3) + 
  geom_point(data = labelDataFDP,
             aes(y=ecdf), shape=21,colour="black",size=3) +
  scale_color_manual(values=c("dodgerblue2", "red4")) +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) + 
  ggtitle("ECDF for the FDP, 2015 Session") + 
  theme_classic()

session2015FDPplot

pdf(file = "figures/ecdfs_svp_fdp.pdf",height=.8*10, width=.8*6)
multiplot(session2015SVPplot,session2015FDPplot,rows=2)
dev.off()






