all analyses of project one moved to new generation (old saved as gen_1)

2022-12-02 16:43:31 +01:00 · 2022-12-02 16:43:31 +01:00 · d6fc414822
commit d6fc414822
parent 244add2bb4
29 changed files with 1319 additions and 123 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/Decline/00master.R
+++ b/Decline/00master.R
@ -3,27 +3,31 @@
 ## 
 ## Based on the assignment work from the ISL-course
 ## 
 ## Generation 2 - 02.december.2022
 ## Code preparation for analysis on Denmarks Statistics server with enriched data set.
 ## 
 ## Analysis plan:
 ## Table 1
 ## Figure 1: Sankey plot (drop & hop colored)
 ## Table 2: Linear regression model of pase_6~.
 ## Table 3: Elastic net prediction models of drop and hop. Performance measures referenced in text.
 ## 
 ## A Rmarkdown file could be created to write the initial report with main results.
 ## This code is a bit of a mess, as it is the result of several iterations. It works however.
 ## 
 ## ====================================================================
 # Step 0: Primary outcome
 ## ====================================================================
-# Difs
+# Script to run as hop and drop
 rel_dif <- 20 # 20 % difference
 abs_dif <- 20 # 20 point diff
 # pout <- "diff"
 # 
 # Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
 # This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!) 
 # 
 pout <- "drop" # Drop to first quartile
 # decl_rel
 # decl_abs
 # drop
 # hop
 ## ====================================================================
@ -38,20 +42,6 @@ source("data_set.R")
 source("data_format.R")
 ## ====================================================================
 # Libraries
 ## ====================================================================
 library(tidyverse)
 library(glue)
 library(patchwork)
 # library(ggdendro)
 library(corrplot)
 library(gt)
 library(gtsummary)
 ## ====================================================================
 ## 
 ## Baseline
@ -64,30 +54,30 @@ library(gtsummary)
 ## ====================================================================
-lbs<-var.labels[match(colnames(X_tbl), 
+# lbs<-var.labels[match(colnames(X_tbl), 
-                      names(var.labels))]
+#                       names(var.labels))]
-
+# 
-ls<-lapply(1:ncol(X_tbl),function(x){
+# ls<-lapply(1:ncol(X_tbl),function(x){
-  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
+#   as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
-})
+# })
-
+# 
-ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
+# ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
-                by = "group",
+#                 by = "group",
-                missing = "no",
+#                 missing = "no",
-                # label = ls[-length(ls)], ## Removing the last, as this is output
+#                 # label = ls[-length(ls)], ## Removing the last, as this is output
-                value = list(where(is.factor) ~ "2"),
+#                 value = list(where(is.factor) ~ "2"),
-                type = list(mrs_0 ~ "categorical"),
+#                 type = list(mrs_0 ~ "categorical"),
-                statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+#                 statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
-)%>%
+# )%>%
-  add_overall() %>%
+#   add_overall() %>%
-  add_n()%>%
+#   add_n()%>%
-  as_gt()
+#   as_gt()
-
+# 
-ts
+# ts
-
+# 
-ts_rtf <- file("table1.RTF", "w")
+# ts_rtf <- file("table1.RTF", "w")
-writeLines(ts%>%as_rtf(), ts_rtf)
+# writeLines(ts%>%as_rtf(), ts_rtf)
-close(ts_rtf)
+# close(ts_rtf)
@ -104,24 +94,24 @@ ls<-lapply(1:ncol(X_tbl_f),function(x){
 # Step 2: table - edited
 ## ====================================================================
-ts_e<-tbl_summary(X_tbl,
+# ts_e<-tbl_summary(X_tbl,
-                  missing = "no",
+#                   missing = "no",
-                  value = list(where(is.factor) ~ "2"),
+#                   value = list(where(is.factor) ~ "2"),
-                  type = list(mrs_0 ~ "categorical",
+#                   type = list(mrs_0 ~ "categorical",
-                              mrs_1 ~ "categorical"),
+#                               mrs_1 ~ "categorical"),
-                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+#                   statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
-)%>%
+# )%>%
-  as_gt()
+#   as_gt()
-
+# 
-ts_e
+# ts_e
 ## ====================================================================
 # Step 3: table export
 ## ====================================================================
-ts_rtf <- file("table1_overall.RTF", "w")
+# ts_rtf <- file("table1_overall.RTF", "w")
-writeLines(ts%>%as_rtf(), ts_rtf)
+# writeLines(ts%>%as_rtf(), ts_rtf)
-close(ts_rtf)
+# close(ts_rtf)
 ## ====================================================================
 # Baseline table - by PASE group
@ -138,11 +128,43 @@ ts_q <- X_tbl |>
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
 ) |> 
  add_overall() |> 
-  add_n
+  add_n ()
 ts_q
 ## ====================================================================
 # Drops and hops
 ## ====================================================================
 # TRUEs are patients dropping
 table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
 # TRUEs are percentage of patients inactive before stroke being more active after
 table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
 # TRUEs are percentage of patients being more active after that were inactive before stroke
 table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
 # Difference between hop/no-hop
 t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
 summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
 summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
 boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
 # Stationary low
 t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
 boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
 ## ====================================================================
 # Sankey plot
 ## ====================================================================
 # source("sankey.R")
 # p_delta
 ## ====================================================================
 # Six months PASE: Bivariate and multivariate analyses
@ -196,13 +218,22 @@ tbl_merge(list(uv_reg,mu_reg))
 ## ====================================================================
-source("assign_full.R")
+# source("assign_full.R")
-source("regularisation_steps.R")
+
 ls <- list()
 for (i in c("drop","hop")){
  pout <- i
  source("data_format.R")
  source("regularisation_steps.R")
 }
 # Loop to run regularised model on both drop and hop.
 # Saved in list for printing and exporting the plot.
 ## ====================================================================
 # Step 1: data merge
 ## ====================================================================
-tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
+tbl<-merge(ls$drop$RegularisedCoefs$'_data',ls$hop$RegularisedCoefs$'_data',by="name",all.x=T, sort=F)
 ## ====================================================================
 # Step 2: table
@ -214,22 +245,25 @@ com_coef_tbl<-tbl%>%
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
-    label = "Full model",
+    label = "DROP",
-    columns = 6:8
+    columns = 2:5
  )%>%
  tab_spanner(
-    label = paste0("Regularised model, (a=",
+    label = "HOP",
-                   best_alph,
+    columns = 6:9
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )
 # paste0("Regularised model, (a=",
 #        best_alph,
 #        ", l=",
 #        round(best_lamb,3),
 #        ")")
 com_coef_tbl
 ## ====================================================================
@ -249,12 +283,6 @@ close(com_coef_rtf)
 ## 
 ## ====================================================================
 # ROC curve of best model
 p_roc
 ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
 ## ====================================================================
 # Step 1: data set
 ## ====================================================================
--- a/Decline/archive/.DS_Store
+++ b/Decline/archive/.DS_Store
--- a/Decline/archive/generation_1/.DS_Store
+++ b/Decline/archive/generation_1/.DS_Store
--- a/Decline/archive/generation_1/00master.R
+++ b/Decline/archive/generation_1/00master.R
@ -0,0 +1,409 @@
 ## 
 ## Master script
 ## 
 ## Based on the assignment work from the ISL-course
 ## 
 ## 
 ## 
 ## ====================================================================
 # Step 0: Primary outcome
 ## ====================================================================
 # Difs
 rel_dif <- 20 # 20 % difference
 abs_dif <- 20 # 20 point diff
 # pout <- "diff"
 # 
 # Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
 # This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!) 
 # 
 pout <- "drop" # Drop to first quartile
 # decl_rel
 # decl_abs
 # drop
 # hop
 ## ====================================================================
 ## Data
 ## ====================================================================
 setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
 source("data_set.R")
 # Loading data-set from USB, to not store on computer
 source("data_format.R")
 ## ====================================================================
 # Libraries
 ## ====================================================================
 library(tidyverse)
 library(glue)
 library(patchwork)
 # library(ggdendro)
 library(corrplot)
 library(gt)
 library(gtsummary)
 ## ====================================================================
 ## 
 ## Baseline
 ## 
 ## ====================================================================
 ## ====================================================================
 # Step 0: labels
 ## ====================================================================
 lbs<-var.labels[match(colnames(X_tbl), 
                      names(var.labels))]
 ls<-lapply(1:ncol(X_tbl),function(x){
  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
 })
 ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
                by = "group",
                missing = "no",
                # label = ls[-length(ls)], ## Removing the last, as this is output
                value = list(where(is.factor) ~ "2"),
                type = list(mrs_0 ~ "categorical"),
                statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
 )%>%
  add_overall() %>%
  add_n()%>%
  as_gt()
 ts
 ts_rtf <- file("table1.RTF", "w")
 writeLines(ts%>%as_rtf(), ts_rtf)
 close(ts_rtf)
 ## ====================================================================
 # Step 1: labels
 ## ====================================================================
 lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]
 ls<-lapply(1:ncol(X_tbl_f),function(x){
  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
 })
 ## ====================================================================
 # Step 2: table - edited
 ## ====================================================================
 ts_e<-tbl_summary(X_tbl,
                  missing = "no",
                  value = list(where(is.factor) ~ "2"),
                  type = list(mrs_0 ~ "categorical",
                              mrs_1 ~ "categorical"),
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
 )%>%
  as_gt()
 ts_e
 ## ====================================================================
 # Step 3: table export
 ## ====================================================================
 ts_rtf <- file("table1_overall.RTF", "w")
 writeLines(ts%>%as_rtf(), ts_rtf)
 close(ts_rtf)
 ## ====================================================================
 # Baseline table - by PASE group
 ## ====================================================================
 ts_q <- X_tbl |> 
  select(vars) |> 
  mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |> 
  select(-pase_6,-pase_0) |> 
  tbl_summary(missing = "no",
                  by="pase_0_cut",
                  value = list(where(is.factor) ~ "2"),
                  type = list(mrs_0 ~ "categorical"),
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
 ) |> 
  add_overall() |> 
  add_n ()
 ts_q
 ## ====================================================================
 # Drops and hops
 ## ====================================================================
 # TRUEs are patients dropping
 table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
 # TRUEs are percentage of patients inactive before stroke being more active after
 table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
 # TRUEs are percentage of patients being more active after that were inactive before stroke
 table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
 # Difference between hop/no-hop
 t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
 summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
 summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
 boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
 # Stationary low
 t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
 boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
 ## ====================================================================
 # Sankey plot
 ## ====================================================================
 if (pout=="drop"){
  source("sankey.R")
  p_delta
 }
 ## ====================================================================
 # Six months PASE: Bivariate and multivariate analyses
 ## ====================================================================
 dta_lmreg <- X_tbl |> 
  select(vars) |> 
  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) 
 Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"
 uv_reg <- tbl_uvregression(data=dta_lmreg, 
                           method=lm, 
                           y="pase_6",
                           show_single_row = where(is.factor),
                           estimate_fun = ~style_sigfig(.x,digits = 3),
                           pvalue_fun = ~style_pvalue(.x, digits = 3)
 )
 mu_reg <- dta_lmreg |>
  lm(formula=pase_6~.,data=_) |> 
  tbl_regression(show_single_row = where(is.factor),
                 estimate_fun = ~style_sigfig(.x,digits = 3),
                 pvalue_fun = ~style_pvalue(.x, digits = 3)
  )|> 
  add_n()
 tbl_merge(list(uv_reg,mu_reg))
 ## ====================================================================
 ## 
 ## Data variance
 ## 
 ## Illustrating principal components.
 ## 
 ## ====================================================================
 # source("PCA.R")
 # 
 # 
 # pca22
 # ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
 ## ====================================================================
 ## 
 ## Models
 ## 
 ## ====================================================================
 source("assign_full.R")
 source("regularisation_steps.R")
 ## ====================================================================
 # Step 1: data merge
 ## ====================================================================
 tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
 ## ====================================================================
 # Step 2: table
 ## ====================================================================
 com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )
 com_coef_tbl
 ## ====================================================================
 # Step 3: export
 ## ====================================================================
 com_coef_rtf <- file("table2.RTF", "w")
 writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
 close(com_coef_rtf)
 ## ====================================================================
 ## 
 ## Model performance
 ## 
 ## Table with performance meassures for the two different models.
 ## 
 ## ====================================================================
 # ROC curve of best model
 p_roc
 ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
 ## ====================================================================
 # Step 1: data set
 ## ====================================================================
 tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
 ## ====================================================================
 # Step 2: table
 ## ====================================================================
 tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )
 tbl_perf
 ## ====================================================================
 # Step 3: export
 ## ====================================================================
 tbl_perf_rtf <- file("table3.RTF", "w")
 writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
 close(tbl_perf_rtf)
 # 
 ## ====================================================================
 ## 
 ## Secondary analysis
 ## 
 ## ====================================================================
 Xy<-dta_s
 X<-dta_s|>select(-group)
 y<-dta_s$group
 source("assign_full.R")
 source("regularisation_steps.R")
 ## ====================================================================
 # Step 1: data merge
 ## ====================================================================
 tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
 ## ====================================================================
 # Step 2: table
 ## ====================================================================
 com_coef_tbl<-tbl%>%
  gt()%>%
  fmt_number(
    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
    label = "Full model",
    columns = 6:8
  )%>%
  tab_spanner(
    label = paste0("Regularised model, (a=",
                   best_alph,
                   ", l=",
                   round(best_lamb,3),
                   ")"),
    columns = 2:5
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )
 com_coef_tbl
 ## ====================================================================
 # Step 3: export
 ## ====================================================================
 com_coef_rtf <- file("table2_sec.RTF", "w")
 writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
 close(com_coef_rtf)
 ## ====================================================================
 ## 
 ## Model performance
 ## 
 ## Table with performance meassures for the two different models.
 ## 
 ## ====================================================================
 ## ====================================================================
 # Step 1: data set
 ## ====================================================================
 tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
 ## ====================================================================
 # Step 2: table
 ## ====================================================================
 tbl_perf<-tbl%>%
  gt()%>%
  tab_header(
    title = "Performance meassures",
    subtitle = "Combined table of both full and regularised performance meassures"
  )
 tbl_perf
 ## ====================================================================
 # Step 3: export
 ## ====================================================================
 tbl_perf_rtf <- file("table3_sec.RTF", "w")
 writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
 close(tbl_perf_rtf)
--- a/Decline/archive/generation_1/PCA.R
+++ b/Decline/archive/generation_1/PCA.R
--- a/Decline/archive/generation_1/assign_full.R
+++ b/Decline/archive/generation_1/assign_full.R
--- a/Decline/archive/generation_1/assigndata.csv
+++ b/Decline/archive/generation_1/assigndata.csv
--- a/Decline/archive/generation_1/data_format.R
+++ b/Decline/archive/generation_1/data_format.R
@ -0,0 +1,290 @@
 ## ItMLiHSmar2022
 ## data_format.R, child script
 ## Data formatting and handling
 ## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
 ## 
 ## Now modified to use in publication
 ## 
 ## ====================================================================
 # Step 1: Loading libraries
 ## ====================================================================
 library(Hmisc)
 library(dplyr)
 library(daDoctoR)
 library(tidyselect)
 ## ====================================================================
 # Step 2: Loading data
 ## ====================================================================
 # rm(list = ls()) # Clear
 # setwd("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/") 
 # dta<-read.csv("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv")
 ## ====================================================================
 # Step 3: Formatting variables
 ## ====================================================================
 dta <- export %>% 
  # as_tibble()%>%
  mutate(any_rep=factor(ifelse(thrombolysis=="yes"|thrombechtomy=="yes","yes","no")), # If not noted, no therapy was received
         male_sex= factor(ifelse(sex=="female","no","yes")),
         # smoke_ever=factor(ifelse(smoke_ever=="never","no","yes")),
         civil=factor(ifelse(civil=="partner","no","yes")), # Sets "yes" for not-cohabiting
         rtreat=factor(ifelse(rtreat=="Placebo","no","yes")), # "Yes" receives active treatment
         alc=factor(ifelse(alc=="more","yes","no")), # Yes for more than guideline
         pase_0=as.numeric(pase_0),
         pase_6=as.numeric(pase_6),
         across(c("diabetes",
                  "hypertension",
                  "smoker",
                  # "smoker_prev",
                  "afli",
                  "pad",
                  "ami",
                  "tci",
                  "mrs_0",
                  "mrs_1"),as.factor),
         across(c("nihss_c",
                  "age",
                  "mdi_1", # For "enriched" analysis
                  "who5_score_1",
                  "mfi_gen_1",
                  "mfi_phys_1",
                  "mfi_act_1",
                  "mfi_mot_1",
                  "mfi_men_1"),as.numeric )
         )%>% 
  select(-c(sex))
 ## ====================================================================
 # Step 4: Defining outcome
 ## ====================================================================
 ## Changed to step 7
 ## This is to perform proper quantile split based on actually included.
 ## ====================================================================
 # Step 5: Ordering variables
 ## ====================================================================
 vars <- c("age",
          "male_sex",
          "civil",
          "pase_0",
          "smoker",
          "alc",
          "afli",
          "hypertension",
          "diabetes",
          "pad",
          "ami",
          "tci",
          "mrs_0",
          "nihss_c",
          "any_rep",
          "rtreat",
          "pase_6")
 dta<-select(dta,c(vars, 
                  "mrs_1",
                  "mfi_gen_1",
                  "mfi_phys_1",
                  "mfi_act_1",
                  "mfi_mot_1",
                  "mfi_men_1",
                  "mdi_1", 
                  "who5_score_1"
                  ))
 ## ====================================================================
 # Step 6: Labeling
 ## ====================================================================
 var.labels = c(age="Age",
               male_sex="Male",
               civil="Living alone",
               pase_0="Pre-stroke PASE score",
               pase_6="Six month PASE score",
               smoker="Daily or occasinally smoking",
               # smoker_prev="Previous habbit of smoking",
               alc="More alcohol than recommendation",
               afli="AFIB",
               hypertension="Hypertension",
               diabetes="Diabetes",
               pad="PAD",
               ami="Previous MI",
               tci="Previous TIA",
               mrs_0="Pre-stroke mRS  [-1]",
               nihss_c="Acute NIHSS score",
               thrombolysis="Acute thrombolysis",
               thrombechtomy="Acute thrombechtomy",
               any_rep="Any reperfusion therapy",
               rtreat="Active trial treatment",
               mrs_1="One month mRS [-1]",
               mfi_gen_1="One month MFI (General fatigue)",
               mfi_phys_1="One month MFI (Physical fatigue)",
               mfi_act_1="One month MFI (Reduced activity)",
               mfi_mot_1="One month MFI (Reduced motivation)",
               mfi_men_1="One month MFI (Mental fatigue)",
               mdi_1="One month MDI",
               who5_score_1="One month WHO5",
               pase_decl_rel_fac="PASE score difference, relative F",
               pase_decl_abs_fac="PASE score difference, absolute F",
               pase_drop_fac="PASE first quartile drop F",
               pase_hop_fac="PASE first quartile hop F",
               pase_diff="PASE absolute decline",
               pase_decl_rel="PASE relative decline",
               pase_0_cut="PASE 0 quartiles",
               pase_6_cut="PASE 6 quartiles")
 ## Labelling based on outcome flag
 if (pout=="decl_rel"|pout=="decl_abs"){
  var.labels = c(var.labels,group="PASE decline")}
 if (pout=="drop"){
  var.labels = c(var.labels,group="PASE drop")}
 ## ====================================================================
 # Step 7: final data export
 ## ====================================================================
 data_summary<-summary(dta)
 # Saving "old" factorised variables
 sel<-sapply(dta,is.factor)
 # Reformatting factors as 1/2 for analysis
 dta<-dta |> 
  mutate(across(where(is.factor), as.numeric))|> # Turning factors into 1(no) or 2(yes) for model. Numbered alphabetically.
  mutate(across(matches(colnames(dta)[sel]), as.factor),
         across(starts_with("pase_"), as.numeric))
 # Filtering out non-PASE
 X_tbl<-dta |>
  filter(!is.na(pase_0),!is.na(pase_6))
 nrow(X_tbl)
 # Defining possible outcome meassures. Keeping in df for characterisation
 X_tbl <- X_tbl|>
  mutate(## Relative decline
         pase_diff=(pase_0-pase_6),
         pase_decl_rel = pase_diff/pase_0*100,
         pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
         ## Absolute decline
         pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
         ## Drop
         pase_0_cut=quantile_cut(as.numeric(pase_0),
                                 groups=4,
                                 group.names = c(as.character(1:4)),
                                 y=as.numeric(pase_0),
                                 ordered.f = TRUE,
                                 inc.outs = TRUE, 
                                 detail.lst=FALSE),
         pase_6_cut=quantile_cut(as.numeric(pase_6),
                                 groups=4,
                                 group.names = c(as.character(1:4)),
                                 y=as.numeric(pase_0),
                                 ordered.f = TRUE,
                                 inc.outs = TRUE, 
                                 detail.lst=FALSE),
         pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
         pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
 Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
 # Setting final primary output from "pout"
 if (pout=="decl_rel"){
  X_tbl <- X_tbl|>
    mutate(group=pase_decl_rel_fac)
  X_tbl_f <- X_tbl|>
    filter(pase_0!=0)|>
    select(-starts_with("pase_"))
 }
 if (pout=="decl_abs"){
  X_tbl <- X_tbl|>
    mutate(group=pase_decl_rel_fac)
  X_tbl_f <- X_tbl|>
    filter(pase_0>=abs_dif)|>
    select(-starts_with("pase_"))
 }
 if (pout=="drop"){
  X_tbl <- X_tbl|>
    mutate(group=pase_drop_fac)
  # print(quantile(as.numeric(X_tbl$pase_0)))
  # print(quantile(as.numeric(X_tbl$pase_6)))
  # print(summary(X_tbl$pase_0_cut))
  X_tbl_f <- X_tbl|>
    filter(pase_0_cut!=1)|>
    select(-starts_with("pase_"))
 }
 if (pout=="hop"){
  X_tbl <- X_tbl|>
    mutate(group=pase_hop_fac)
  # print(quantile(as.numeric(X_tbl$pase_0)))
  # print(quantile(as.numeric(X_tbl$pase_6)))
  # print(summary(X_tbl$pase_0_cut))
  X_tbl_f <- X_tbl|>
    filter(pase_6_cut!=1)|>
    select(-starts_with("pase_"))
 }
 # Excluding one month measures for primary analysis and setting df for table one
 X_tbl_f <- X_tbl_f |> 
  select(-c(who5_score_1,
            mdi_1,
            mrs_1,
            starts_with("mfi_"))) # Left out of model as no present in drop-group
 # Dropping non-complete for analysis
 Xy <- X_tbl_f|> 
  na.omit()|> # Keeping only complete observations
  select(-c(tci) # Left out of model as no present in drop-group
  )|>
  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) # Sets binary mRS 0 to include in glmnet, 0 or above
 label(Xy) = as.list(var.labels[match(names(Xy), names(var.labels))])
 X<-dplyr::select(Xy,-c(group, -starts_with("pase_")) # Exclude primary outcome
                 )
 y<-Xy$group
 ## ====================================================================
 # Secondary analysis
 ## ====================================================================
 dta_s<-X_tbl|>
  select(-c(tci),
         -starts_with("pase_"))|> 
  na.omit()|>
  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)),# Sets binary mRS 0 to include in glmnet, 0 or above
         mrs_1=factor(ifelse(mrs_1==1,1,2)))# Sets binary mRS 1 to include in glmnet, 0 or above
 label(dta_s) = as.list(var.labels[match(names(dta_s), names(var.labels))])
 ## ====================================================================
 # Step 8: Loading rest of libraries
 ## ====================================================================
 library(tidyverse)
 library(patchwork)
 library(caret)
 library(glmnet)
 library(leaps)
 library(pROC)
 library(gt)
 library(gtsummary)
 library(dplyr)
--- a/Decline/archive/generation_1/data_set.R
+++ b/Decline/archive/generation_1/data_set.R
@ -0,0 +1,49 @@
 ## ItMLiHS assignment data set
 export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
 export<-export[,c("pase_0",
               "age",
               "sex",
               "civil",
               "smoke_ever", 
               "smoker",
               "rtreat",
               "alc",
               "afli",
               "hypertension",
               "diabetes",
               "mrs_0",
               "nihss_c",
               "thrombolysis",
               "pad", 
               "thrombechtomy", 
               "ami", 
               "tci", 
               "pase_6", 
               "mrs_1", 
               "mfi_gen_1", 
               "mfi_phys_1", 
               "mfi_act_1", 
               "mfi_mot_1", 
               "mfi_men_1",
               "mdi_1",
               "who5_score_1")]
 export$diabetes[is.na(export$diabetes)]<-"no"
 export$diabetes[is.na(export$hypertension)]<-"no"
 export$thrombolysis[is.na(export$thrombolysis)]<-"no"
 export$thrombechtomy[is.na(export$thrombechtomy)]<-"no"
 export$pad[is.na(export$pad)]<-"no"
 export$ami[is.na(export$ami)]<-"no"
 # export$smoker_prev <- ifelse(export$smoker=="3","yes","no")
 export$smoker <- ifelse(export$smoker=="1","yes","no")
 export$smoker[is.na(export$smoker)] <- "no"
 # export$mrs_0[export$mrs_0==3]<-NA
 # export<-na.omit(export)
 export
 # write.csv(export,"/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv",row.names = FALSE)
--- a/Decline/archive/generation_1/dataset_redcap.R
+++ b/Decline/archive/generation_1/dataset_redcap.R
--- a/Decline/archive/generation_1/pc_plot.png
+++ b/Decline/archive/generation_1/pc_plot.png
--- a/Decline/archive/generation_1/regular_fun.R
+++ b/Decline/archive/generation_1/regular_fun.R
@ -0,0 +1,117 @@
 ## ItMLiHSmar2022
 ## regular_fun.R, child script
 ## Regularisation model building function
 ## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
 ## 
 ## Now modified to use in publication
 ## 
 regular_fun<-function(X,y,K,lambdas,alpha){
 n<-nrow(X)
 set.seed(321)
 # Using caret function to ensure both levels represented in all folds
 c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE) 
 B<-yhatTestProbKeep<-list()
 accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))
 catinfo<-levels(y)
 cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))
 ## Iterate over partitions
 for (idx1 in 1:K){
  # Status
  cat('Processing fold', idx1, 'of', K,'\n')
  # idx1=1
  # Get training- and test sets
  I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
  I_test = !I_train
  Xtrain = X[I_train,]
  ytrain = y[I_train]
  Xtest = X[I_test,]
  ytest = y[I_test]
  ## Model matrices for glmnet
  ## Using the complicated approach not to include first level.
  # Xmat.train<-model.matrix(~ .-1, data=Xtrain, 
  #              contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)], 
  #                                     contrasts, contrasts=T))
  # Xmat.test<-model.matrix(~ .-1, data=Xtest, 
  #                contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)], 
  #                                       contrasts, contrasts=T))
  # Xmat.train<-model.matrix(~.-1,Xtrain)
  # Xmat.test<-model.matrix(~.-1,Xtest)
  # Weights
  ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
  # ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
  # Fit regularized linear regression model
  mod<-glmnet(Xtrain, ytrain, 
              alpha = alpha,        ## Alpha = 1 for lasso
              lambda = lambdas, ## Setting lambdas
              standardize = TRUE, ## Scales and centers
              weights = ytrain_weight,
              family = "binomial"
              )
  # Keep coefficients for plot
  B[[idx1]] <- as.matrix(coef(mod))
  # Iterate over regularization strengths to compute training- and test
  # errors for individual regularization strengths.
  for (idx2 in 1:length(lambdas)){
    # idx2=1
    # Predict
    yhatTrainProb<-predict(mod, 
                           s = lambdas[idx2], 
                           newx = data.matrix(Xtrain),
                           type = "response"
    )
    yhatTestProb<-predict(mod, 
                          s = lambdas[idx2], 
                          newx = data.matrix(Xtest),
                          type = "response"
    )
    # Compute training and test error
    yhatTrain = round(yhatTrainProb)
    yhatTest = round(yhatTestProb)
    # Make predictions categorical again (instead of 0/1 coding)
    yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
    yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
    # Evaluate classifier performance
    # Accuracy
    # accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
    # accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
    # #
    # # Error rate
    # err_train[idx1,idx2] =  1 - accTrain[idx1,idx2]
    # err_test [idx1,idx2] =   1 - accTest[idx1,idx2]
    # AUROC
    suppressMessages(
      auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
    suppressMessages(
      auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
    # Compute confusion matrices
    cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
    cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
  }
 }
 ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
 return(ls)
 }
--- a/Decline/archive/generation_1/regularisation_steps.R
+++ b/Decline/archive/generation_1/regularisation_steps.R
@ -0,0 +1,142 @@
 ## ItMLiHSmar2022
 ## regularisation_steps.R, child script
 ## Regularised model building and analysation for assignment
 ## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
 ## 
 ## Now modified to use in publication
 ## 
 ## ====================================================================
 ## Step 0: data import and wrangling
 ## ====================================================================
 setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
 # source("data_format.R")
 y1<-factor(as.integer(y)-1) ## Outcome is required to be factor of 0 or 1.
 ## ====================================================================
 ## Step 1: settings
 ## ====================================================================
 ## Folds
 K=10
 set.seed(3)
 c<-caret::createFolds(y=y, 
                      k = K, 
                      list = FALSE, 
                      returnTrain = TRUE) # Foldids for alpha tuning
 ## Defining tuning parameters
 lambdas=2^seq(-10, 5, 1)
 alphas<-seq(0,1,.1)
 ## Weights for models
 weighted=TRUE
 if (weighted == TRUE) {
  wght<-as.vector(1 - (table(y)[y] / length(y)))
 } else {
  wght <- rep(1, nrow(y))
 }
 ## Standardise numeric
 ## Centered and 
 ## ====================================================================
 ## Step 2: all cross validations for each alpha
 ## ====================================================================
 library(furrr)
 library(purrr)
 library(doMC)
 registerDoMC(cores=6)
 # Nested CVs with analysis for all lambdas for each alpha
 # 
 set.seed(3)
 cvs <- future_map(alphas, function(a){
  cv.glmnet(model.matrix(~.-1,X),
            y1,
            weights = wght,
            lambda=lambdas, 
            type.measure = "deviance", # This is standard measure and recommended for tuning
            foldid = c, # Per recommendation the folds are kept for alpha optimisation
            alpha=a,
            standardize=TRUE,
            family=quasibinomial,
            keep=TRUE) # Same as binomial, but not as picky
 })
 ## ====================================================================
 # Step 3: optimum lambda for each alpha
 ## ====================================================================
 # For each alpha, lambda is chosen for the lowest meassure (deviance)
 each_alpha <- sapply(seq_along(alphas), function(id) {
  each_cv <- cvs[[id]]
  alpha_val <- alphas[id]
  index_lmin <- match(each_cv$lambda.min, 
                      each_cv$lambda)
  c(lamb = each_cv$lambda.min, 
    alph = alpha_val,
    cvm = each_cv$cvm[index_lmin])
 })
 # Best lambda
 best_lamb <- min(each_alpha["lamb", ])
 # Alpha is chosen for best lambda with lowest model deviance, each_alpha["cvm",]
 best_alph <- each_alpha["alph",][each_alpha["cvm",]==min(each_alpha["cvm",]
                                                         [each_alpha["lamb",] %in% best_lamb])]
 ## https://stackoverflow.com/questions/42007313/plot-an-roc-curve-in-r-with-ggplot2
 p_roc<-roc.glmnet(cvs[[1]]$fit.preval, newy = y)[[match(best_alph,alphas)]]|> # Plots performance from model with best alpha
  ggplot(aes(FPR,TPR)) + 
  geom_step() +
  coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
  geom_abline()+
  theme_bw()
 ## ====================================================================
 # Step 4: Creating the final model
 ## ====================================================================
 source("regular_fun.R") # Custom function
 optimised_model<-regular_fun(X,y1,K,lambdas=best_lamb,alpha=best_alph) 
 # With lambda and alpha specified, the function is just a k-fold cross-validation wrapper, 
 # but keeps model performance figures from each fold.
 list2env(optimised_model,.GlobalEnv)
 # Function outputs a list, which is unwrapped to Env.
 # See source script for reference.
 ## ====================================================================
 # Step 5: creating table of coefficients for inference
 ## ====================================================================
 Bmatrix<-matrix(unlist(B),ncol=10)
 Bmedian<-apply(Bmatrix,1,median)
 Bmean<-apply(Bmatrix,1,mean)
 reg_coef_tbl<-tibble(
  name = c("Intercept",Hmisc::label(X)),
  medianX = round(Bmedian,5),
  ORmed = round(exp(Bmedian),5),
  meanX = round(Bmean,5),
  ORmea = round(exp(Bmean),5))%>%
  # arrange(desc(abs(medianX)))%>%
  gt()
 reg_coef_tbl
 ## ====================================================================
 # Step 6: plotting predictive performance
 ## ====================================================================
 reg_cfm<-confusionMatrix(cMatTest)
 reg_auc_sum<-summary(auc_test[,1])
--- a/Decline/archive/generation_1/roc_plot.png
+++ b/Decline/archive/generation_1/roc_plot.png
--- a/Decline/archive/generation_1/sankey.R
+++ b/Decline/archive/generation_1/sankey.R
@ -0,0 +1,72 @@
 #
 # Sankey plot of quartile movement for drops
 #
 gth<-X_tbl[,c("pase_0_cut","pase_6_cut","pase_drop_fac")]
 gth$pase_drop_fac <- factor(ifelse(gth$pase_0_cut=="1",
                                       "low",
                                       gth$pase_drop_fac),
                                labels = c("no","yes","low")) # Tried and tried to do vectorised, but failed. Matrices acting up..
 # Visuals - sankey
 # https://stackoverflow.com/questions/50395027/beautifying-sankey-alluvial-visualization-using-r
 ## Painting
 # LOOK AT THIS GREAT FUNCTION!! Wide pivot format. Includes factor for possible quartile-colouring.
 df<-data.frame(gth %>% count(pase_0_cut,pase_6_cut,pase_drop_fac))
 lbs0<-c(paste0("1st\n(n=",sum(df$n[df[1]=="1"]),")"), 
        paste0("2nd\n(n=",sum(df$n[df[1]=="2"]),")"), 
        paste0("3rd\n(n=",sum(df$n[df[1]=="3"]),")"), 
        paste0("4th\n(n=",sum(df$n[df[1]=="4"]),")"))
 lbs6<-c(paste0("1st\n(n=",sum(df$n[df[2]=="1"]),")"), 
        paste0("2nd\n(n=",sum(df$n[df[2]=="2"]),")"), 
        paste0("3rd\n(n=",sum(df$n[df[2]=="3"]),")"), 
        paste0("4th\n(n=",sum(df$n[df[2]=="4"]),")"))
 df[1:2] <- as_factor(df[1:2])
 levels(df[,1])<-lbs0[1:length(levels(df[,1]))]
 levels(df[,2])<-lbs6[1:length(levels(df[,2]))]
 df[,3]<-factor(df[,3],levels=c("low","no","yes"))
 lows  <- "grey80"  # grey
 drops <- "#990033"  # Midtrød
 nos   <- "grey50" 
 nas   <- "grey90" 
 border<- "#66c1a3"
 box   <- "#7fccb2"
 cls <- c(lows,nos,drops)
 alpha <- 0.7
 library(ggalluvial)
 (p_delta<-ggplot(df,aes(y = n, axis1 = pase_0_cut, axis2 = pase_6_cut)) +
  geom_alluvium(aes(fill = pase_drop_fac, color=pase_drop_fac), width = 1/10, alpha = alpha, knot.pos = 0.3)+
  geom_stratum(width = 1/6, fill = box, color = border) +
  geom_text(stat = "stratum", aes(label=after_stat(stratum))) +
  scale_x_continuous(breaks = 1:2, labels = c("Pre-stroke\nPASE score\nquartiles", "Six months\nPASE score\nquartiles"))  +
  scale_fill_manual(values  = cls) +
  scale_color_manual(values = cls) +
    scale_y_reverse() + # Easy solution to flip y-axis
  labs(title="Change in physical activity") +
    ylab("Quartiles")+
  theme_minimal() +
  theme(legend.position = "none", 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 14, face = "bold"), 
        plot.title = element_text(hjust = 0.5, size = 20, face = "bold")))
 ggsave("sankey.png", plot = last_plot(), device = NULL, path = NULL,
       scale = 1, width = 120, height = 200, dpi = 450, limitsize = TRUE,
       units = "mm")
--- a/Decline/archive/generation_1/sankey.png
+++ b/Decline/archive/generation_1/sankey.png
--- a/Decline/archive/generation_1/standardise.R
+++ b/Decline/archive/generation_1/standardise.R
@ -0,0 +1,41 @@
 ## ItMLiHSmar2022
 ## standardise.R, child script
 ## Data standardisation, returns list
 ## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
 standardise<-function(train,test,type){
  # From:
  # https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r
  sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric)
  cnm<-colnames(Xtrain) # Saving column names for ordering
  # Subsetting
  ## Data to treat
  train.tr<-train[,sel]
  test.tr<-test[,sel]
  ## Data to save
  train.sv<-train[,!sel]
  test.sv<-test[,!sel]
  # Calculate mean and SD of train data
  trainMean <- sapply(train.tr,mean)
  trainSd <- sapply(train.tr,sd)
  if (type=="c"){
    ## centered
    norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise   
    norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise   
  }
  if (type=="cs"){
    ## centered AND scaled (Z-score standardisation)
    norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/")
    norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/")
  }
  return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original
              XtestSt=cbind(norm.testData,test.sv)[,cnm]))
 }
--- a/Decline/archive/generation_1/table1.RTF
+++ b/Decline/archive/generation_1/table1.RTF
@ -141,7 +141,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 386}}\cell
@ -160,7 +160,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 387}}\cell
@ -179,7 +179,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -198,7 +198,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -710,6 +710,25 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
 \intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
 \intbl {\f0 {\f0\fs20 0 (0%)}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
 \intbl {\f0 {\f0\fs20 0 (0%)}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
 \intbl {\f0 {\f0\fs20 0 (0%)}}\cell
 \row
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
 \intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell
--- a/Decline/archive/generation_1/table1_overall.RTF
+++ b/Decline/archive/generation_1/table1_overall.RTF
@ -141,7 +141,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 386}}\cell
@ -160,7 +160,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 387}}\cell
@ -179,7 +179,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -198,7 +198,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -710,6 +710,25 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
 \intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
 \intbl {\f0 {\f0\fs20 0 (0%)}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
 \intbl {\f0 {\f0\fs20 0 (0%)}}\cell
 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
 \intbl {\f0 {\f0\fs20 0 (0%)}}\cell
 \row
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
 \intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell
--- a/Decline/archive/generation_1/table2.RTF
+++ b/Decline/archive/generation_1/table2.RTF
@ -239,7 +239,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -267,7 +267,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.083}}\cell
@ -295,7 +295,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -323,7 +323,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
--- a/Decline/archive/generation_1/table2_sec.RTF
+++ b/Decline/archive/generation_1/table2_sec.RTF
@ -239,7 +239,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -267,7 +267,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -295,7 +295,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -323,7 +323,7 @@
 \trowd\trrh0
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell
 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
--- a/Decline/archive/generation_1/table3.RTF
+++ b/Decline/archive/generation_1/table3.RTF
--- a/Decline/archive/generation_1/table3_sec.RTF
+++ b/Decline/archive/generation_1/table3_sec.RTF
--- a/Decline/data_format.R
+++ b/Decline/data_format.R
@ -135,6 +135,7 @@ var.labels = c(age="Age",
               pase_decl_rel_fac="PASE score difference, relative F",
               pase_decl_abs_fac="PASE score difference, absolute F",
               pase_drop_fac="PASE first quartile drop F",
               pase_hop_fac="PASE first quartile hop F",
               pase_diff="PASE absolute decline",
               pase_decl_rel="PASE relative decline",
               pase_0_cut="PASE 0 quartiles",
@ -171,9 +172,9 @@ X_tbl <- X_tbl|>
  mutate(## Relative decline
         pase_diff=(pase_0-pase_6),
         pase_decl_rel = pase_diff/pase_0*100,
-         pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
+         # pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
         ## Absolute decline
-         pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
+         # pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
         ## Drop
         pase_0_cut=quantile_cut(as.numeric(pase_0),
                                 groups=4,
@ -189,42 +190,38 @@ X_tbl <- X_tbl|>
                                 ordered.f = TRUE,
                                 inc.outs = TRUE, 
                                 detail.lst=FALSE),
-         pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")))
+         pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
         pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
 Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
 # Setting final primary output from "pout"
 if (pout=="decl_rel"){
  X_tbl <- X_tbl|>
    mutate(group=pase_decl_rel_fac)
  X_tbl_f <- X_tbl|>
    filter(pase_0!=0)|>
    select(-starts_with("pase_"))
 }
 if (pout=="decl_abs"){
  X_tbl <- X_tbl|>
    mutate(group=pase_decl_rel_fac)
  X_tbl_f <- X_tbl|>
    filter(pase_0>=abs_dif)|>
    select(-starts_with("pase_"))
 }
 if (pout=="drop"){
  X_tbl <- X_tbl|>
    mutate(group=pase_drop_fac)
-  print(quantile(as.numeric(X_tbl$pase_0),na.rm=T))
+  # print(quantile(as.numeric(X_tbl$pase_0)))
-  print(quantile(as.numeric(X_tbl$pase_6),na.rm=T))
+  # print(quantile(as.numeric(X_tbl$pase_6)))
-  print(summary(X_tbl$pase_0_cut))
+  # print(summary(X_tbl$pase_0_cut))
  X_tbl_f <- X_tbl|>
    filter(pase_0_cut!=1)|>
    select(-starts_with("pase_"))
 }
 if (pout=="hop"){
  X_tbl <- X_tbl|>
    mutate(group=pase_hop_fac)
  # print(quantile(as.numeric(X_tbl$pase_0)))
  # print(quantile(as.numeric(X_tbl$pase_6)))
  # print(summary(X_tbl$pase_0_cut))
  X_tbl_f <- X_tbl|>
    filter(pase_6_cut!=1)|>
    select(-starts_with("pase_"))
 }
 # Excluding one month measures for primary analysis and setting df for table one
 X_tbl_f <- X_tbl_f |> 
  select(-c(who5_score_1,
@ -273,3 +270,7 @@ library(pROC)
 library(gt)
 library(gtsummary)
 library(dplyr)
 library(tidyverse)
 library(glue)
 # library(ggdendro)
 library(corrplot)
--- a/Decline/data_set.R
+++ b/Decline/data_set.R
@ -1,6 +1,6 @@
 ## ItMLiHS assignment data set
-export<-read.csv("/Volumes/Data/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
+export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
 export<-export[,c("pase_0",
               "age",
--- a/Decline/regularisation_steps.R
+++ b/Decline/regularisation_steps.R
@ -132,11 +132,19 @@ reg_coef_tbl<-tibble(
  # arrange(desc(abs(medianX)))%>%
  gt()
 reg_coef_tbl
 ## ====================================================================
 # Step 6: plotting predictive performance
 ## ====================================================================
 reg_cfm<-confusionMatrix(cMatTest)
 reg_auc_sum<-summary(auc_test[,1])
 ## ====================================================================
 # Step 7: Packing list to save in loop
 ## ====================================================================
 ls[[i]] <- list("RegularisedCoefs"=reg_coef_tbl,
                "bestA"=best_alph,
                "bestL"=best_lamb,
                "ConfusionMatrx"=reg_cfm,
                "AUROC"=reg_auc_sum)
--- a/Longterm/survival.R
+++ b/Longterm/survival.R
@ -0,0 +1 @@
 survival
--- a/app/rsconnect/shinyapps.io/cognitiveindex/index_app.dcf
+++ b/app/rsconnect/shinyapps.io/cognitiveindex/index_app.dcf
@ -5,7 +5,7 @@ account: cognitiveindex
 server: shinyapps.io
 hostUrl: https://api.shinyapps.io/v1
 appId: 6803928
-bundleId: 6217136
+bundleId: 6561652
 url: https://cognitiveindex.shinyapps.io/index_app/
-when: 1660899020.73485
+when: 1669625189.89414
-lastSyncTime: 1660899020.73486
+lastSyncTime: 1669625189.89416