PhysicalActivityandStrokeOu.../1 PA Decline/00master.R

##
## Master script
##
## Based on the assignment work from the ISL-course
##
##
##

## ====================================================================
# Step 0: Primary outcome
## ====================================================================

# Difs
rel_dif <- 20 # 20 % difference
abs_dif <- 20 # 20 point diff

# pout <- "diff"
#
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!)
#
pout <- "drop" # Drop to first quartile

# decl_rel
# decl_abs
# drop


## ====================================================================
## Data
## ====================================================================


setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")

source("data_set.R")
# Loading data-set from USB, to not store on computer

source("data_format.R")

## ====================================================================
# Libraries
## ====================================================================


library(tidyverse)
library(glue)
library(patchwork)
# library(ggdendro)
library(corrplot)
library(gt)
library(gtsummary)


## ====================================================================
##
## Baseline
##
## ====================================================================


## ====================================================================
# Step 0: labels
## ====================================================================


lbs<-var.labels[match(colnames(X_tbl),
                      names(var.labels))]

ls<-lapply(1:ncol(X_tbl),function(x){
  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
})

ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
                by = "group",
                missing = "no",
                # label = ls[-length(ls)], ## Removing the last, as this is output
                value = list(where(is.factor) ~ "2"),
                type = list(mrs_0 ~ "categorical"),
                statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>%
  add_overall() %>%
  add_n()%>%
  as_gt()

ts

ts_rtf <- file("table1.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf)


## ====================================================================
# Step 1: labels
## ====================================================================
lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]

ls<-lapply(1:ncol(X_tbl_f),function(x){
  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
})

## ====================================================================
# Step 2: table - edited
## ====================================================================

ts_e<-tbl_summary(X_tbl,
                  missing = "no",
                  value = list(where(is.factor) ~ "2"),
                  type = list(mrs_0 ~ "categorical",
                              mrs_1 ~ "categorical"),
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>%
  as_gt()

ts_e

## ====================================================================
# Step 3: table export
## ====================================================================

ts_rtf <- file("table1_overall.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf)

## ====================================================================
# Baseline table - by PASE group
## ====================================================================

ts_q <- X_tbl |>
  select(vars) |>
  mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |>
  select(-pase_6,-pase_0) |>
  tbl_summary(missing = "no",
                  by="pase_0_cut",
                  value = list(where(is.factor) ~ "2"),
                  type = list(mrs_0 ~ "categorical"),
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
) |>
  add_overall() |>
  add_n

ts_q


## ====================================================================
# Six months PASE: Bivariate and multivariate analyses
## ====================================================================

dta_lmreg <- X_tbl |>
  select(vars) |>
  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)))

Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"

uv_reg <- tbl_uvregression(data=dta_lmreg,
                           method=lm,
                           y="pase_6",
                           show_single_row = where(is.factor),
                           estimate_fun = ~style_sigfig(.x,digits = 3),
                           pvalue_fun = ~style_pvalue(.x, digits = 3)
)

mu_reg <- dta_lmreg |>
  lm(formula=pase_6~.,data=_) |>
  tbl_regression(show_single_row = where(is.factor),
                 estimate_fun = ~style_sigfig(.x,digits = 3),
                 pvalue_fun = ~style_pvalue(.x, digits = 3)
  )|>
  add_n()

tbl_merge(list(uv_reg,mu_reg))


## ====================================================================
##
## Data variance
##
## Illustrating principal components.
##
## ====================================================================


# source("PCA.R")
#
#
# pca22
# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")


## ====================================================================
##
## Models
##
## ====================================================================


source("assign_full.R")
source("regularisation_steps.R")

## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)

## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )

com_coef_tbl

## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)


## ====================================================================
##
## Model performance
##
## Table with performance meassures for the two different models.
##
## ====================================================================

# ROC curve of best model

p_roc
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")


## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))

## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )

tbl_perf

## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)
#

## ====================================================================
##
## Secondary analysis
##
## ====================================================================

Xy<-dta_s
X<-dta_s|>select(-group)
y<-dta_s$group


source("assign_full.R")
source("regularisation_steps.R")

## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)

## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )

com_coef_tbl

## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2_sec.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)


## ====================================================================
##
## Model performance
##
## Table with performance meassures for the two different models.
##
## ====================================================================


## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))

## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )

tbl_perf

## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3_sec.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)