## 
## Master script
## 
## Based on the assignment work from the ISL-course
## 
## 
## 

## ====================================================================
# Step 0: Primary outcome
## ====================================================================

# Difs
rel_dif <- 20 # 20 % difference
abs_dif <- 20 # 20 point diff

# pout <- "diff"
# 
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!) 
# 
pout <- "drop" # Drop to first quartile

# decl_rel
# decl_abs
# drop


## ====================================================================
## Data
## ====================================================================


setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")

source("data_set.R")
# Loading data-set from USB, to not store on computer

source("data_format.R")

## ====================================================================
# Libraries
## ====================================================================


library(tidyverse)
library(glue)
library(patchwork)
library(ggdendro)
library(corrplot)
library(gt)
library(gtsummary)


## ====================================================================
## 
## Baseline
## 
## ====================================================================


## ====================================================================
# Step 0: labels
## ====================================================================


lbs<-var.labels[match(colnames(X_tbl), 
                      names(var.labels))]

ls<-lapply(1:ncol(X_tbl),function(x){
  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
})

ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
                by = "group",
                missing = "no",
                # label = ls[-length(ls)], ## Removing the last, as this is output
                value = list(where(is.factor) ~ "2"),
                type = list(mrs_0 ~ "categorical"),
                statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>%
  add_overall() %>%
  add_n()%>%
  as_gt()

ts

ts_rtf <- file("table1.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf)


## ====================================================================
# Step 1: labels
## ====================================================================
lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]

ls<-lapply(1:ncol(X_tbl_f),function(x){
  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
})

## ====================================================================
# Step 2: table - edited
## ====================================================================

ts_e<-tbl_summary(X_tbl,
                  missing = "no",
                  # label = ls[-length(ls)], ## Removing the last, as this is output
                  value = list(where(is.factor) ~ "2"),
                  type = list(mrs_0 ~ "categorical",
                              mrs_1 ~ "categorical"),
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>%
  as_gt()

ts_e

## ====================================================================
# Step 3: table export
## ====================================================================

ts_rtf <- file("table1_overall.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf)


## ====================================================================
## 
## Data variance
## 
## Illustrating principal components.
## 
## ====================================================================


# source("PCA.R")
# 
# 
# pca22
# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")


## ====================================================================
## 
## Models
## 
## ====================================================================


source("assign_full.R")
source("regularisation_steps.R")

## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)

## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )

com_coef_tbl

## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)


## ====================================================================
## 
## Model performance
## 
## Table with performance meassures for the two different models.
## 
## ====================================================================

# ROC curve of best model

p_roc
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")


## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))

## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )

tbl_perf

## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)
# 

## ====================================================================
## 
## Secondary analysis
## 
## ====================================================================

Xy<-dta_s
X<-dta_s|>select(-group)
y<-dta_s$group


source("assign_full.R")
source("regularisation_steps.R")

## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)

## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )

com_coef_tbl

## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2_sec.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)


## ====================================================================
## 
## Model performance
## 
## Table with performance meassures for the two different models.
## 
## ====================================================================


## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))

## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )

tbl_perf

## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3_sec.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)