## 
## Master script
## 
## Based on the assignment work from the ISL-course
## 
## Generation 2 - 02.december.2022
## Code preparation for analysis on Denmarks Statistics server with enriched data set.
## 
## Analysis plan:
## Table 1
## Figure 1: Sankey plot (drop & hop colored)
## Table 2: Linear regression model of pase_6~.
## Table 3: Elastic net prediction models of drop and hop. Performance measures referenced in text.
## 
## A Rmarkdown file could be created to write the initial report with main results.
## This code is a bit of a mess, as it is the result of several iterations. It works however.
## 

## ====================================================================
# Step 0: Primary outcome
## ====================================================================

# Script to run as hop and drop

pout <- "drop" # Drop to first quartile

# decl_rel
# decl_abs
# drop
# hop


## ====================================================================
## Data
## ====================================================================


setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")

source("data_set.R")
# Loading data-set from USB, to not store on computer

source("data_format.R")

## ====================================================================
## 
## Baseline - by PASE group
## 
## ====================================================================

ts_q <- X_tbl |> 
  select(vars) |> 
  mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |> 
  select(-pase_6,-pase_0) |> 
  tbl_summary(missing = "no",
                  by="pase_0_cut",
                  value = list(where(is.factor) ~ "2"),
                  type = list(mrs_0 ~ "categorical",
                              all_continuous() ~ "continuous2"),
                  statistic = list(all_continuous() ~ c("{N_nonmiss}",
                                                        "{median} ({p25}, {p75})", 
                                                        "{min}, {max}",
                                                        "{mean} ({sd})"))
) |> 
  add_overall() |> 
  add_n ()

ts_q

## ====================================================================
# Drops and hops
## ====================================================================

# TRUEs are patients dropping
table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])

# TRUEs are percentage of patients inactive before stroke being more active after
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])

# TRUEs are percentage of patients being more active after that were inactive before stroke
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])

# Difference between hop/no-hop
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])

summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])

summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])

boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])

# Stationary low
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])

boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])

## ====================================================================
# Sankey plot
## ====================================================================

# source("sankey.R")
# p_delta

## ====================================================================
# Six months PASE: Bivariate and multivariate analyses
## ====================================================================

dta_lmreg <- X_tbl |> 
  select(vars) |> 
  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) 

Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"

uv_reg <- tbl_uvregression(data=dta_lmreg, 
                           method=lm, 
                           y="pase_6",
                           show_single_row = where(is.factor),
                           estimate_fun = ~style_sigfig(.x,digits = 3),
                           pvalue_fun = ~style_pvalue(.x, digits = 3)
)

mu_reg <- dta_lmreg |>
  lm(formula=pase_6~.,data=_) |> 
  tbl_regression(show_single_row = where(is.factor),
                 estimate_fun = ~style_sigfig(.x,digits = 3),
                 pvalue_fun = ~style_pvalue(.x, digits = 3)
  )|> 
  add_n()

tbl_merge(list(uv_reg,mu_reg))


## ====================================================================
## 
## Data variance
## 
## Illustrating principal components.
## 
## ====================================================================


# source("PCA.R")
# 
# 
# pca22
# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")


## ====================================================================
## 
## Models
## 
## ====================================================================


# source("assign_full.R")

ls <- list()
for (i in c("drop","hop")){
  pout <- i
  source("data_format.R")
  source("regularisation_steps.R")
}

# Loop to run regularised model on both drop and hop.
# Saved in list for printing and exporting the plot.

## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(ls$drop$RegularisedCoefs$'_data',ls$hop$RegularisedCoefs$'_data',by="name",all.x=T, sort=F)

## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "DROP",
    columns = 2:5
  )%>%
  tab_spanner(
    label = "HOP",
    columns = 6:9
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )


# paste0("Regularised model, (a=",
#        best_alph,
#        ", l=",
#        round(best_lamb,3),
#        ")")

com_coef_tbl

## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)


## ====================================================================
## 
## Model performance
## 
## Table with performance meassures for the two different models.
## 
## ====================================================================

## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))

## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )

tbl_perf

## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)
# 

## ====================================================================
## 
## Secondary analysis
## 
## ====================================================================

Xy<-dta_s
X<-dta_s|>select(-group)
y<-dta_s$group


source("assign_full.R")
source("regularisation_steps.R")

## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)

## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )

com_coef_tbl

## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2_sec.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)


## ====================================================================
## 
## Model performance
## 
## Table with performance meassures for the two different models.
## 
## ====================================================================


## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))

## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )

tbl_perf

## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3_sec.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)