all analyses of project one moved to new generation (old saved as gen_1)

2022-12-02 16:43:31 +01:00 · 2022-12-02 16:43:31 +01:00 · d6fc414822
commit d6fc414822
parent 244add2bb4
29 changed files with 1319 additions and 123 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/Decline/00master.R
+++ b/Decline/00master.R
@ -3,27 +3,31 @@
 ## 
 ## Based on the assignment work from the ISL-course
 ## 
+## Generation 2 - 02.december.2022
+## Code preparation for analysis on Denmarks Statistics server with enriched data set.
 ## 
+## Analysis plan:
+## Table 1
+## Figure 1: Sankey plot (drop & hop colored)
+## Table 2: Linear regression model of pase_6~.
+## Table 3: Elastic net prediction models of drop and hop. Performance measures referenced in text.
+## 
+## A Rmarkdown file could be created to write the initial report with main results.
+## This code is a bit of a mess, as it is the result of several iterations. It works however.
 ## 

 ## ====================================================================
 # Step 0: Primary outcome
 ## ====================================================================

-# Difs
-rel_dif <- 20 # 20 % difference
-abs_dif <- 20 # 20 point diff
+# Script to run as hop and drop

-# pout <- "diff"
-# 
-# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
-# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!) 
-# 
 pout <- "drop" # Drop to first quartile

 # decl_rel
 # decl_abs
 # drop
+# hop


 ## ====================================================================
@ -38,20 +42,6 @@ source("data_set.R")

 source("data_format.R")

-## ====================================================================
-# Libraries
-## ====================================================================
-
-
-library(tidyverse)
-library(glue)
-library(patchwork)
-# library(ggdendro)
-library(corrplot)
-library(gt)
-library(gtsummary)
-
-
 ## ====================================================================
 ## 
 ## Baseline
@ -64,30 +54,30 @@ library(gtsummary)
 ## ====================================================================


-lbs<-var.labels[match(colnames(X_tbl), 
-                      names(var.labels))]
-
-ls<-lapply(1:ncol(X_tbl),function(x){
-  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
-})
-
-ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
-                by = "group",
-                missing = "no",
-                # label = ls[-length(ls)], ## Removing the last, as this is output
-                value = list(where(is.factor) ~ "2"),
-                type = list(mrs_0 ~ "categorical"),
-                statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
-)%>%
-  add_overall() %>%
-  add_n()%>%
-  as_gt()
-
-ts
-
-ts_rtf <- file("table1.RTF", "w")
-writeLines(ts%>%as_rtf(), ts_rtf)
-close(ts_rtf)
+# lbs<-var.labels[match(colnames(X_tbl), 
+#                       names(var.labels))]
+# 
+# ls<-lapply(1:ncol(X_tbl),function(x){
+#   as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
+# })
+# 
+# ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
+#                 by = "group",
+#                 missing = "no",
+#                 # label = ls[-length(ls)], ## Removing the last, as this is output
+#                 value = list(where(is.factor) ~ "2"),
+#                 type = list(mrs_0 ~ "categorical"),
+#                 statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+# )%>%
+#   add_overall() %>%
+#   add_n()%>%
+#   as_gt()
+# 
+# ts
+# 
+# ts_rtf <- file("table1.RTF", "w")
+# writeLines(ts%>%as_rtf(), ts_rtf)
+# close(ts_rtf)



@ -104,24 +94,24 @@ ls<-lapply(1:ncol(X_tbl_f),function(x){
 # Step 2: table - edited
 ## ====================================================================

-ts_e<-tbl_summary(X_tbl,
-                  missing = "no",
-                  value = list(where(is.factor) ~ "2"),
-                  type = list(mrs_0 ~ "categorical",
-                              mrs_1 ~ "categorical"),
-                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
-)%>%
-  as_gt()
-
-ts_e
+# ts_e<-tbl_summary(X_tbl,
+#                   missing = "no",
+#                   value = list(where(is.factor) ~ "2"),
+#                   type = list(mrs_0 ~ "categorical",
+#                               mrs_1 ~ "categorical"),
+#                   statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+# )%>%
+#   as_gt()
+# 
+# ts_e

 ## ====================================================================
 # Step 3: table export
 ## ====================================================================

-ts_rtf <- file("table1_overall.RTF", "w")
-writeLines(ts%>%as_rtf(), ts_rtf)
-close(ts_rtf)
+# ts_rtf <- file("table1_overall.RTF", "w")
+# writeLines(ts%>%as_rtf(), ts_rtf)
+# close(ts_rtf)

 ## ====================================================================
 # Baseline table - by PASE group
@ -138,11 +128,43 @@ ts_q <- X_tbl |>
                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
 ) |> 
  add_overall() |> 
-  add_n
+  add_n ()

 ts_q

+## ====================================================================
+# Drops and hops
+## ====================================================================

+# TRUEs are patients dropping
+table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
+
+# TRUEs are percentage of patients inactive before stroke being more active after
+table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
+
+# TRUEs are percentage of patients being more active after that were inactive before stroke
+table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
+
+# Difference between hop/no-hop
+t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
+
+summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
+
+summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
+
+boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
+
+# Stationary low
+t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
+
+boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
+
+## ====================================================================
+# Sankey plot
+## ====================================================================
+
+# source("sankey.R")
+# p_delta

 ## ====================================================================
 # Six months PASE: Bivariate and multivariate analyses
@ -196,13 +218,22 @@ tbl_merge(list(uv_reg,mu_reg))
 ## ====================================================================


-source("assign_full.R")
+# source("assign_full.R")
+
+ls <- list()
+for (i in c("drop","hop")){
+  pout <- i
+  source("data_format.R")
  source("regularisation_steps.R")
+}
+
+# Loop to run regularised model on both drop and hop.
+# Saved in list for printing and exporting the plot.

 ## ====================================================================
 # Step 1: data merge
 ## ====================================================================
-tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
+tbl<-merge(ls$drop$RegularisedCoefs$'_data',ls$hop$RegularisedCoefs$'_data',by="name",all.x=T, sort=F)

 ## ====================================================================
 # Step 2: table
@ -214,22 +245,25 @@ com_coef_tbl<-tbl%>%
    rows = everything(),
    decimals = 3)%>%
  tab_spanner(
-    label = "Full model",
-    columns = 6:8
+    label = "DROP",
+    columns = 2:5
  )%>%
  tab_spanner(
-    label = paste0("Regularised model, (a=",
-                   best_alph,
-                   ", l=",
-                   round(best_lamb,3),
-                   ")"),
-    columns = 2:5
+    label = "HOP",
+    columns = 6:9
  )%>%
  tab_header(
    title = "Model coefficients",
    subtitle = "Combined table of both full and regularised model coefficients"
  )

+
+# paste0("Regularised model, (a=",
+#        best_alph,
+#        ", l=",
+#        round(best_lamb,3),
+#        ")")
+
 com_coef_tbl

 ## ====================================================================
@ -249,12 +283,6 @@ close(com_coef_rtf)
 ## 
 ## ====================================================================

-# ROC curve of best model
-
-p_roc
-ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
-
-
 ## ====================================================================
 # Step 1: data set
 ## ====================================================================
--- a/Decline/archive/.DS_Store
+++ b/Decline/archive/.DS_Store
--- a/Decline/archive/generation_1/.DS_Store
+++ b/Decline/archive/generation_1/.DS_Store
--- a/Decline/archive/generation_1/00master.R
+++ b/Decline/archive/generation_1/00master.R
@ -0,0 +1,409 @@
+## 
+## Master script
+## 
+## Based on the assignment work from the ISL-course
+## 
+## 
+## 
+
+## ====================================================================
+# Step 0: Primary outcome
+## ====================================================================
+
+# Difs
+rel_dif <- 20 # 20 % difference
+abs_dif <- 20 # 20 point diff
+
+# pout <- "diff"
+# 
+# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
+# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!) 
+# 
+pout <- "drop" # Drop to first quartile
+
+# decl_rel
+# decl_abs
+# drop
+# hop
+
+
+## ====================================================================
+## Data
+## ====================================================================
+
+
+setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
+
+source("data_set.R")
+# Loading data-set from USB, to not store on computer
+
+source("data_format.R")
+
+## ====================================================================
+# Libraries
+## ====================================================================
+
+
+library(tidyverse)
+library(glue)
+library(patchwork)
+# library(ggdendro)
+library(corrplot)
+library(gt)
+library(gtsummary)
+
+
+## ====================================================================
+## 
+## Baseline
+## 
+## ====================================================================
+
+
+## ====================================================================
+# Step 0: labels
+## ====================================================================
+
+
+lbs<-var.labels[match(colnames(X_tbl), 
+                      names(var.labels))]
+
+ls<-lapply(1:ncol(X_tbl),function(x){
+  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
+})
+
+ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
+                by = "group",
+                missing = "no",
+                # label = ls[-length(ls)], ## Removing the last, as this is output
+                value = list(where(is.factor) ~ "2"),
+                type = list(mrs_0 ~ "categorical"),
+                statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+)%>%
+  add_overall() %>%
+  add_n()%>%
+  as_gt()
+
+ts
+
+ts_rtf <- file("table1.RTF", "w")
+writeLines(ts%>%as_rtf(), ts_rtf)
+close(ts_rtf)
+
+
+
+## ====================================================================
+# Step 1: labels
+## ====================================================================
+lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]
+
+ls<-lapply(1:ncol(X_tbl_f),function(x){
+  as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
+})
+
+## ====================================================================
+# Step 2: table - edited
+## ====================================================================
+
+ts_e<-tbl_summary(X_tbl,
+                  missing = "no",
+                  value = list(where(is.factor) ~ "2"),
+                  type = list(mrs_0 ~ "categorical",
+                              mrs_1 ~ "categorical"),
+                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+)%>%
+  as_gt()
+
+ts_e
+
+## ====================================================================
+# Step 3: table export
+## ====================================================================
+
+ts_rtf <- file("table1_overall.RTF", "w")
+writeLines(ts%>%as_rtf(), ts_rtf)
+close(ts_rtf)
+
+## ====================================================================
+# Baseline table - by PASE group
+## ====================================================================
+
+ts_q <- X_tbl |> 
+  select(vars) |> 
+  mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |> 
+  select(-pase_6,-pase_0) |> 
+  tbl_summary(missing = "no",
+                  by="pase_0_cut",
+                  value = list(where(is.factor) ~ "2"),
+                  type = list(mrs_0 ~ "categorical"),
+                  statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
+) |> 
+  add_overall() |> 
+  add_n ()
+
+ts_q
+
+## ====================================================================
+# Drops and hops
+## ====================================================================
+
+# TRUEs are patients dropping
+table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
+
+# TRUEs are percentage of patients inactive before stroke being more active after
+table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
+
+# TRUEs are percentage of patients being more active after that were inactive before stroke
+table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
+
+# Difference between hop/no-hop
+t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
+
+summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
+
+summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
+
+boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
+
+# Stationary low
+t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
+
+boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
+
+## ====================================================================
+# Sankey plot
+## ====================================================================
+
+if (pout=="drop"){
+  source("sankey.R")
+  p_delta
+}
+
+
+## ====================================================================
+# Six months PASE: Bivariate and multivariate analyses
+## ====================================================================
+
+dta_lmreg <- X_tbl |> 
+  select(vars) |> 
+  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) 
+
+Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"
+
+uv_reg <- tbl_uvregression(data=dta_lmreg, 
+                           method=lm, 
+                           y="pase_6",
+                           show_single_row = where(is.factor),
+                           estimate_fun = ~style_sigfig(.x,digits = 3),
+                           pvalue_fun = ~style_pvalue(.x, digits = 3)
+)
+
+mu_reg <- dta_lmreg |>
+  lm(formula=pase_6~.,data=_) |> 
+  tbl_regression(show_single_row = where(is.factor),
+                 estimate_fun = ~style_sigfig(.x,digits = 3),
+                 pvalue_fun = ~style_pvalue(.x, digits = 3)
+  )|> 
+  add_n()
+
+tbl_merge(list(uv_reg,mu_reg))
+
+
+## ====================================================================
+## 
+## Data variance
+## 
+## Illustrating principal components.
+## 
+## ====================================================================
+
+
+# source("PCA.R")
+# 
+# 
+# pca22
+# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
+
+
+## ====================================================================
+## 
+## Models
+## 
+## ====================================================================
+
+
+source("assign_full.R")
+source("regularisation_steps.R")
+
+## ====================================================================
+# Step 1: data merge
+## ====================================================================
+tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
+
+## ====================================================================
+# Step 2: table
+## ====================================================================
+com_coef_tbl<-tbl%>%
+  gt()%>%
+  fmt_number(
+    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
+    rows = everything(),
+    decimals = 3)%>%
+  tab_spanner(
+    label = "Full model",
+    columns = 6:8
+  )%>%
+  tab_spanner(
+    label = paste0("Regularised model, (a=",
+                   best_alph,
+                   ", l=",
+                   round(best_lamb,3),
+                   ")"),
+    columns = 2:5
+  )%>%
+  tab_header(
+    title = "Model coefficients",
+    subtitle = "Combined table of both full and regularised model coefficients"
+  )
+
+com_coef_tbl
+
+## ====================================================================
+# Step 3: export
+## ====================================================================
+com_coef_rtf <- file("table2.RTF", "w")
+writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
+close(com_coef_rtf)
+
+
+
+## ====================================================================
+## 
+## Model performance
+## 
+## Table with performance meassures for the two different models.
+## 
+## ====================================================================
+
+# ROC curve of best model
+
+p_roc
+ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
+
+
+## ====================================================================
+# Step 1: data set
+## ====================================================================
+tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
+                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
+                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
+
+## ====================================================================
+# Step 2: table
+## ====================================================================
+tbl_perf<-tbl%>%
+  gt()%>%
+  tab_header(
+    title = "Performance meassures",
+    subtitle = "Combined table of both full and regularised performance meassures"
+  )
+
+tbl_perf
+
+## ====================================================================
+# Step 3: export
+## ====================================================================
+tbl_perf_rtf <- file("table3.RTF", "w")
+writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
+close(tbl_perf_rtf)
+# 
+
+## ====================================================================
+## 
+## Secondary analysis
+## 
+## ====================================================================
+
+Xy<-dta_s
+X<-dta_s|>select(-group)
+y<-dta_s$group
+
+
+source("assign_full.R")
+source("regularisation_steps.R")
+
+## ====================================================================
+# Step 1: data merge
+## ====================================================================
+tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
+
+## ====================================================================
+# Step 2: table
+## ====================================================================
+com_coef_tbl<-tbl%>%
+  gt()%>%
+  fmt_number(
+    columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
+    rows = everything(),
+    decimals = 3)%>%
+  tab_spanner(
+    label = "Full model",
+    columns = 6:8
+  )%>%
+  tab_spanner(
+    label = paste0("Regularised model, (a=",
+                   best_alph,
+                   ", l=",
+                   round(best_lamb,3),
+                   ")"),
+    columns = 2:5
+  )%>%
+  tab_header(
+    title = "Model coefficients",
+    subtitle = "Combined table of both full and regularised model coefficients"
+  )
+
+com_coef_tbl
+
+## ====================================================================
+# Step 3: export
+## ====================================================================
+com_coef_rtf <- file("table2_sec.RTF", "w")
+writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
+close(com_coef_rtf)
+
+
+## ====================================================================
+## 
+## Model performance
+## 
+## Table with performance meassures for the two different models.
+## 
+## ====================================================================
+
+
+## ====================================================================
+# Step 1: data set
+## ====================================================================
+tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
+                "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
+                "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
+
+## ====================================================================
+# Step 2: table
+## ====================================================================
+tbl_perf<-tbl%>%
+  gt()%>%
+  tab_header(
+    title = "Performance meassures",
+    subtitle = "Combined table of both full and regularised performance meassures"
+  )
+
+tbl_perf
+
+## ====================================================================
+# Step 3: export
+## ====================================================================
+tbl_perf_rtf <- file("table3_sec.RTF", "w")
+writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
+close(tbl_perf_rtf)
--- a/Decline/archive/generation_1/PCA.R
+++ b/Decline/archive/generation_1/PCA.R
--- a/Decline/archive/generation_1/assign_full.R
+++ b/Decline/archive/generation_1/assign_full.R
--- a/Decline/archive/generation_1/assigndata.csv
+++ b/Decline/archive/generation_1/assigndata.csv
--- a/Decline/archive/generation_1/data_format.R
+++ b/Decline/archive/generation_1/data_format.R
@ -0,0 +1,290 @@
+## ItMLiHSmar2022
+## data_format.R, child script
+## Data formatting and handling
+## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
+## 
+## Now modified to use in publication
+## 
+
+## ====================================================================
+# Step 1: Loading libraries
+## ====================================================================
+
+library(Hmisc)
+library(dplyr)
+library(daDoctoR)
+library(tidyselect)
+
+## ====================================================================
+# Step 2: Loading data
+## ====================================================================
+ 
+# rm(list = ls()) # Clear
+# setwd("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/") 
+# dta<-read.csv("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv")
+
+## ====================================================================
+# Step 3: Formatting variables
+## ====================================================================
+
+dta <- export %>% 
+  # as_tibble()%>%
+  mutate(any_rep=factor(ifelse(thrombolysis=="yes"|thrombechtomy=="yes","yes","no")), # If not noted, no therapy was received
+         male_sex= factor(ifelse(sex=="female","no","yes")),
+         # smoke_ever=factor(ifelse(smoke_ever=="never","no","yes")),
+         civil=factor(ifelse(civil=="partner","no","yes")), # Sets "yes" for not-cohabiting
+         rtreat=factor(ifelse(rtreat=="Placebo","no","yes")), # "Yes" receives active treatment
+         alc=factor(ifelse(alc=="more","yes","no")), # Yes for more than guideline
+         pase_0=as.numeric(pase_0),
+         pase_6=as.numeric(pase_6),
+         across(c("diabetes",
+                  "hypertension",
+                  "smoker",
+                  # "smoker_prev",
+                  "afli",
+                  "pad",
+                  "ami",
+                  "tci",
+                  "mrs_0",
+                  "mrs_1"),as.factor),
+         across(c("nihss_c",
+                  "age",
+                  "mdi_1", # For "enriched" analysis
+                  "who5_score_1",
+                  "mfi_gen_1",
+                  "mfi_phys_1",
+                  "mfi_act_1",
+                  "mfi_mot_1",
+                  "mfi_men_1"),as.numeric )
+         )%>% 
+  select(-c(sex))
+  
+  
+## ====================================================================
+# Step 4: Defining outcome
+## ====================================================================
+
+## Changed to step 7
+## This is to perform proper quantile split based on actually included.
+
+## ====================================================================
+# Step 5: Ordering variables
+## ====================================================================
+
+vars <- c("age",
+          "male_sex",
+          "civil",
+          "pase_0",
+          "smoker",
+          "alc",
+          "afli",
+          "hypertension",
+          "diabetes",
+          "pad",
+          "ami",
+          "tci",
+          "mrs_0",
+          "nihss_c",
+          "any_rep",
+          "rtreat",
+          "pase_6")
+
+dta<-select(dta,c(vars, 
+                  "mrs_1",
+                  "mfi_gen_1",
+                  "mfi_phys_1",
+                  "mfi_act_1",
+                  "mfi_mot_1",
+                  "mfi_men_1",
+                  "mdi_1", 
+                  "who5_score_1"
+                  ))
+
+## ====================================================================
+# Step 6: Labeling
+## ====================================================================
+
+var.labels = c(age="Age",
+               male_sex="Male",
+               civil="Living alone",
+               pase_0="Pre-stroke PASE score",
+               pase_6="Six month PASE score",
+               smoker="Daily or occasinally smoking",
+               # smoker_prev="Previous habbit of smoking",
+               alc="More alcohol than recommendation",
+               afli="AFIB",
+               hypertension="Hypertension",
+               diabetes="Diabetes",
+               pad="PAD",
+               ami="Previous MI",
+               tci="Previous TIA",
+               mrs_0="Pre-stroke mRS  [-1]",
+               nihss_c="Acute NIHSS score",
+               thrombolysis="Acute thrombolysis",
+               thrombechtomy="Acute thrombechtomy",
+               any_rep="Any reperfusion therapy",
+               rtreat="Active trial treatment",
+               mrs_1="One month mRS [-1]",
+               mfi_gen_1="One month MFI (General fatigue)",
+               mfi_phys_1="One month MFI (Physical fatigue)",
+               mfi_act_1="One month MFI (Reduced activity)",
+               mfi_mot_1="One month MFI (Reduced motivation)",
+               mfi_men_1="One month MFI (Mental fatigue)",
+               mdi_1="One month MDI",
+               who5_score_1="One month WHO5",
+               pase_decl_rel_fac="PASE score difference, relative F",
+               pase_decl_abs_fac="PASE score difference, absolute F",
+               pase_drop_fac="PASE first quartile drop F",
+               pase_hop_fac="PASE first quartile hop F",
+               pase_diff="PASE absolute decline",
+               pase_decl_rel="PASE relative decline",
+               pase_0_cut="PASE 0 quartiles",
+               pase_6_cut="PASE 6 quartiles")
+
+## Labelling based on outcome flag
+if (pout=="decl_rel"|pout=="decl_abs"){
+  var.labels = c(var.labels,group="PASE decline")}
+if (pout=="drop"){
+  var.labels = c(var.labels,group="PASE drop")}
+
+## ====================================================================
+# Step 7: final data export
+## ====================================================================
+
+data_summary<-summary(dta)
+
+# Saving "old" factorised variables
+sel<-sapply(dta,is.factor)
+# Reformatting factors as 1/2 for analysis
+dta<-dta |> 
+  mutate(across(where(is.factor), as.numeric))|> # Turning factors into 1(no) or 2(yes) for model. Numbered alphabetically.
+  mutate(across(matches(colnames(dta)[sel]), as.factor),
+         across(starts_with("pase_"), as.numeric))
+
+# Filtering out non-PASE
+X_tbl<-dta |>
+  filter(!is.na(pase_0),!is.na(pase_6))
+
+nrow(X_tbl)
+
+# Defining possible outcome meassures. Keeping in df for characterisation
+X_tbl <- X_tbl|>
+  mutate(## Relative decline
+         pase_diff=(pase_0-pase_6),
+         pase_decl_rel = pase_diff/pase_0*100,
+         pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
+         ## Absolute decline
+         pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
+         ## Drop
+         pase_0_cut=quantile_cut(as.numeric(pase_0),
+                                 groups=4,
+                                 group.names = c(as.character(1:4)),
+                                 y=as.numeric(pase_0),
+                                 ordered.f = TRUE,
+                                 inc.outs = TRUE, 
+                                 detail.lst=FALSE),
+         pase_6_cut=quantile_cut(as.numeric(pase_6),
+                                 groups=4,
+                                 group.names = c(as.character(1:4)),
+                                 y=as.numeric(pase_0),
+                                 ordered.f = TRUE,
+                                 inc.outs = TRUE, 
+                                 detail.lst=FALSE),
+         pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
+         pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
+
+Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
+
+# Setting final primary output from "pout"
+if (pout=="decl_rel"){
+  X_tbl <- X_tbl|>
+    mutate(group=pase_decl_rel_fac)
+  
+  X_tbl_f <- X_tbl|>
+    filter(pase_0!=0)|>
+    select(-starts_with("pase_"))
+}
+
+if (pout=="decl_abs"){
+  X_tbl <- X_tbl|>
+    mutate(group=pase_decl_rel_fac)
+  
+  X_tbl_f <- X_tbl|>
+    filter(pase_0>=abs_dif)|>
+    select(-starts_with("pase_"))
+}
+
+if (pout=="drop"){
+  X_tbl <- X_tbl|>
+    mutate(group=pase_drop_fac)
+  
+  # print(quantile(as.numeric(X_tbl$pase_0)))
+  # print(quantile(as.numeric(X_tbl$pase_6)))
+  # print(summary(X_tbl$pase_0_cut))
+  
+  X_tbl_f <- X_tbl|>
+    filter(pase_0_cut!=1)|>
+    select(-starts_with("pase_"))
+}
+
+if (pout=="hop"){
+  X_tbl <- X_tbl|>
+    mutate(group=pase_hop_fac)
+  
+  # print(quantile(as.numeric(X_tbl$pase_0)))
+  # print(quantile(as.numeric(X_tbl$pase_6)))
+  # print(summary(X_tbl$pase_0_cut))
+  
+  X_tbl_f <- X_tbl|>
+    filter(pase_6_cut!=1)|>
+    select(-starts_with("pase_"))
+}
+
+# Excluding one month measures for primary analysis and setting df for table one
+X_tbl_f <- X_tbl_f |> 
+  select(-c(who5_score_1,
+            mdi_1,
+            mrs_1,
+            starts_with("mfi_"))) # Left out of model as no present in drop-group
+
+# Dropping non-complete for analysis
+Xy <- X_tbl_f|> 
+  na.omit()|> # Keeping only complete observations
+  select(-c(tci) # Left out of model as no present in drop-group
+  )|>
+  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) # Sets binary mRS 0 to include in glmnet, 0 or above
+
+label(Xy) = as.list(var.labels[match(names(Xy), names(var.labels))])
+
+X<-dplyr::select(Xy,-c(group, -starts_with("pase_")) # Exclude primary outcome
+                 )
+y<-Xy$group
+
+
+## ====================================================================
+# Secondary analysis
+## ====================================================================
+
+
+dta_s<-X_tbl|>
+  select(-c(tci),
+         -starts_with("pase_"))|> 
+  na.omit()|>
+  mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)),# Sets binary mRS 0 to include in glmnet, 0 or above
+         mrs_1=factor(ifelse(mrs_1==1,1,2)))# Sets binary mRS 1 to include in glmnet, 0 or above
+
+label(dta_s) = as.list(var.labels[match(names(dta_s), names(var.labels))])
+
+## ====================================================================
+# Step 8: Loading rest of libraries
+## ====================================================================
+
+library(tidyverse)
+library(patchwork)
+library(caret)
+library(glmnet)
+library(leaps)
+library(pROC)
+library(gt)
+library(gtsummary)
+library(dplyr)
--- a/Decline/archive/generation_1/data_set.R
+++ b/Decline/archive/generation_1/data_set.R
@ -0,0 +1,49 @@
+## ItMLiHS assignment data set
+
+export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
+
+export<-export[,c("pase_0",
+               "age",
+               "sex",
+               "civil",
+               "smoke_ever", 
+               "smoker",
+               "rtreat",
+               "alc",
+               "afli",
+               "hypertension",
+               "diabetes",
+               "mrs_0",
+               "nihss_c",
+               "thrombolysis",
+               "pad", 
+               "thrombechtomy", 
+               "ami", 
+               "tci", 
+               "pase_6", 
+               "mrs_1", 
+               "mfi_gen_1", 
+               "mfi_phys_1", 
+               "mfi_act_1", 
+               "mfi_mot_1", 
+               "mfi_men_1",
+               "mdi_1",
+               "who5_score_1")]
+
+export$diabetes[is.na(export$diabetes)]<-"no"
+export$diabetes[is.na(export$hypertension)]<-"no"
+export$thrombolysis[is.na(export$thrombolysis)]<-"no"
+export$thrombechtomy[is.na(export$thrombechtomy)]<-"no"
+export$pad[is.na(export$pad)]<-"no"
+export$ami[is.na(export$ami)]<-"no"
+# export$smoker_prev <- ifelse(export$smoker=="3","yes","no")
+export$smoker <- ifelse(export$smoker=="1","yes","no")
+export$smoker[is.na(export$smoker)] <- "no"
+# export$mrs_0[export$mrs_0==3]<-NA
+
+
+
+# export<-na.omit(export)
+
+export
+# write.csv(export,"/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv",row.names = FALSE)
--- a/Decline/archive/generation_1/dataset_redcap.R
+++ b/Decline/archive/generation_1/dataset_redcap.R
--- a/Decline/archive/generation_1/pc_plot.png
+++ b/Decline/archive/generation_1/pc_plot.png
--- a/Decline/archive/generation_1/regular_fun.R
+++ b/Decline/archive/generation_1/regular_fun.R
@ -0,0 +1,117 @@
+## ItMLiHSmar2022
+## regular_fun.R, child script
+## Regularisation model building function
+## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
+## 
+## Now modified to use in publication
+## 
+
+regular_fun<-function(X,y,K,lambdas,alpha){
+n<-nrow(X)
+set.seed(321)
+
+# Using caret function to ensure both levels represented in all folds
+c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE) 
+
+B<-yhatTestProbKeep<-list()
+accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))
+
+catinfo<-levels(y)
+
+cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))
+
+
+## Iterate over partitions
+for (idx1 in 1:K){
+  
+  # Status
+  cat('Processing fold', idx1, 'of', K,'\n')
+  
+  # idx1=1
+  # Get training- and test sets
+  I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
+  I_test = !I_train
+  
+  Xtrain = X[I_train,]
+  ytrain = y[I_train]
+  Xtest = X[I_test,]
+  ytest = y[I_test]
+  
+  
+  ## Model matrices for glmnet
+  ## Using the complicated approach not to include first level.
+  # Xmat.train<-model.matrix(~ .-1, data=Xtrain, 
+  #              contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)], 
+  #                                     contrasts, contrasts=T))
+  # Xmat.test<-model.matrix(~ .-1, data=Xtest, 
+  #                contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)], 
+  #                                       contrasts, contrasts=T))
+  
+  # Xmat.train<-model.matrix(~.-1,Xtrain)
+  # Xmat.test<-model.matrix(~.-1,Xtest)
+  
+  # Weights
+  ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
+  # ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
+  
+  # Fit regularized linear regression model
+  mod<-glmnet(Xtrain, ytrain, 
+              alpha = alpha,        ## Alpha = 1 for lasso
+              lambda = lambdas, ## Setting lambdas
+              standardize = TRUE, ## Scales and centers
+              weights = ytrain_weight,
+              family = "binomial"
+              )
+  
+  # Keep coefficients for plot
+  B[[idx1]] <- as.matrix(coef(mod))
+  
+  # Iterate over regularization strengths to compute training- and test
+  # errors for individual regularization strengths.
+  for (idx2 in 1:length(lambdas)){
+    # idx2=1
+    
+    # Predict
+    yhatTrainProb<-predict(mod, 
+                           s = lambdas[idx2], 
+                           newx = data.matrix(Xtrain),
+                           type = "response"
+    )
+    
+    yhatTestProb<-predict(mod, 
+                          s = lambdas[idx2], 
+                          newx = data.matrix(Xtest),
+                          type = "response"
+    )
+    
+    # Compute training and test error
+    yhatTrain = round(yhatTrainProb)
+    yhatTest = round(yhatTestProb)
+    
+    # Make predictions categorical again (instead of 0/1 coding)
+    yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
+    yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
+    
+    # Evaluate classifier performance
+    # Accuracy
+    # accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
+    # accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
+    # #
+    # # Error rate
+    # err_train[idx1,idx2] =  1 - accTrain[idx1,idx2]
+    # err_test [idx1,idx2] =   1 - accTest[idx1,idx2]
+    
+    # AUROC
+    suppressMessages(
+      auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
+    suppressMessages(
+      auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
+    
+    # Compute confusion matrices
+    cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
+    cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
+  }
+}
+ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
+return(ls)
+}
--- a/Decline/archive/generation_1/regularisation_steps.R
+++ b/Decline/archive/generation_1/regularisation_steps.R
@ -0,0 +1,142 @@
+## ItMLiHSmar2022
+## regularisation_steps.R, child script
+## Regularised model building and analysation for assignment
+## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
+## 
+## Now modified to use in publication
+## 
+
+## ====================================================================
+## Step 0: data import and wrangling
+## ====================================================================
+
+setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
+
+# source("data_format.R")
+y1<-factor(as.integer(y)-1) ## Outcome is required to be factor of 0 or 1.
+
+
+## ====================================================================
+## Step 1: settings
+## ====================================================================
+
+## Folds
+K=10
+set.seed(3)
+c<-caret::createFolds(y=y, 
+                      k = K, 
+                      list = FALSE, 
+                      returnTrain = TRUE) # Foldids for alpha tuning
+
+## Defining tuning parameters
+lambdas=2^seq(-10, 5, 1)
+alphas<-seq(0,1,.1)
+
+## Weights for models
+weighted=TRUE
+if (weighted == TRUE) {
+  wght<-as.vector(1 - (table(y)[y] / length(y)))
+} else {
+  wght <- rep(1, nrow(y))
+}
+
+
+## Standardise numeric
+## Centered and 
+
+
+
+## ====================================================================
+## Step 2: all cross validations for each alpha
+## ====================================================================
+
+library(furrr)
+library(purrr)
+library(doMC)
+registerDoMC(cores=6)
+
+# Nested CVs with analysis for all lambdas for each alpha
+# 
+set.seed(3)
+cvs <- future_map(alphas, function(a){
+  cv.glmnet(model.matrix(~.-1,X),
+            y1,
+            weights = wght,
+            lambda=lambdas, 
+            type.measure = "deviance", # This is standard measure and recommended for tuning
+            foldid = c, # Per recommendation the folds are kept for alpha optimisation
+            alpha=a,
+            standardize=TRUE,
+            family=quasibinomial,
+            keep=TRUE) # Same as binomial, but not as picky
+})
+
+## ====================================================================
+# Step 3: optimum lambda for each alpha
+## ====================================================================
+
+
+# For each alpha, lambda is chosen for the lowest meassure (deviance)
+each_alpha <- sapply(seq_along(alphas), function(id) {
+  each_cv <- cvs[[id]]
+  alpha_val <- alphas[id]
+  index_lmin <- match(each_cv$lambda.min, 
+                      each_cv$lambda)
+  c(lamb = each_cv$lambda.min, 
+    alph = alpha_val,
+    cvm = each_cv$cvm[index_lmin])
+})
+
+# Best lambda
+best_lamb <- min(each_alpha["lamb", ])
+
+# Alpha is chosen for best lambda with lowest model deviance, each_alpha["cvm",]
+best_alph <- each_alpha["alph",][each_alpha["cvm",]==min(each_alpha["cvm",]
+                                                         [each_alpha["lamb",] %in% best_lamb])]
+
+## https://stackoverflow.com/questions/42007313/plot-an-roc-curve-in-r-with-ggplot2
+p_roc<-roc.glmnet(cvs[[1]]$fit.preval, newy = y)[[match(best_alph,alphas)]]|> # Plots performance from model with best alpha
+  ggplot(aes(FPR,TPR)) + 
+  geom_step() +
+  coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
+  geom_abline()+
+  theme_bw()
+
+## ====================================================================
+# Step 4: Creating the final model
+## ====================================================================
+
+source("regular_fun.R") # Custom function
+optimised_model<-regular_fun(X,y1,K,lambdas=best_lamb,alpha=best_alph) 
+# With lambda and alpha specified, the function is just a k-fold cross-validation wrapper, 
+# but keeps model performance figures from each fold.
+
+list2env(optimised_model,.GlobalEnv)
+# Function outputs a list, which is unwrapped to Env.
+# See source script for reference.
+
+## ====================================================================
+# Step 5: creating table of coefficients for inference
+## ====================================================================
+
+Bmatrix<-matrix(unlist(B),ncol=10)
+Bmedian<-apply(Bmatrix,1,median)
+Bmean<-apply(Bmatrix,1,mean)
+
+reg_coef_tbl<-tibble(
+  name = c("Intercept",Hmisc::label(X)),
+  medianX = round(Bmedian,5),
+  ORmed = round(exp(Bmedian),5),
+  meanX = round(Bmean,5),
+  ORmea = round(exp(Bmean),5))%>%
+  # arrange(desc(abs(medianX)))%>%
+  gt()
+
+reg_coef_tbl
+
+## ====================================================================
+# Step 6: plotting predictive performance
+## ====================================================================
+
+reg_cfm<-confusionMatrix(cMatTest)
+reg_auc_sum<-summary(auc_test[,1])
--- a/Decline/archive/generation_1/roc_plot.png
+++ b/Decline/archive/generation_1/roc_plot.png
--- a/Decline/archive/generation_1/sankey.R
+++ b/Decline/archive/generation_1/sankey.R
@ -0,0 +1,72 @@
+#
+# Sankey plot of quartile movement for drops
+#
+
+gth<-X_tbl[,c("pase_0_cut","pase_6_cut","pase_drop_fac")]
+
+gth$pase_drop_fac <- factor(ifelse(gth$pase_0_cut=="1",
+                                       "low",
+                                       gth$pase_drop_fac),
+                                labels = c("no","yes","low")) # Tried and tried to do vectorised, but failed. Matrices acting up..
+
+# Visuals - sankey
+# https://stackoverflow.com/questions/50395027/beautifying-sankey-alluvial-visualization-using-r
+
+
+## Painting
+# LOOK AT THIS GREAT FUNCTION!! Wide pivot format. Includes factor for possible quartile-colouring.
+
+df<-data.frame(gth %>% count(pase_0_cut,pase_6_cut,pase_drop_fac))
+
+lbs0<-c(paste0("1st\n(n=",sum(df$n[df[1]=="1"]),")"), 
+        paste0("2nd\n(n=",sum(df$n[df[1]=="2"]),")"), 
+        paste0("3rd\n(n=",sum(df$n[df[1]=="3"]),")"), 
+        paste0("4th\n(n=",sum(df$n[df[1]=="4"]),")"))
+
+lbs6<-c(paste0("1st\n(n=",sum(df$n[df[2]=="1"]),")"), 
+        paste0("2nd\n(n=",sum(df$n[df[2]=="2"]),")"), 
+        paste0("3rd\n(n=",sum(df$n[df[2]=="3"]),")"), 
+        paste0("4th\n(n=",sum(df$n[df[2]=="4"]),")"))
+
+df[1:2] <- as_factor(df[1:2])
+
+levels(df[,1])<-lbs0[1:length(levels(df[,1]))]
+levels(df[,2])<-lbs6[1:length(levels(df[,2]))]
+
+df[,3]<-factor(df[,3],levels=c("low","no","yes"))
+
+
+lows  <- "grey80"  # grey
+drops <- "#990033"  # Midtrød
+nos   <- "grey50" 
+nas   <- "grey90" 
+border<- "#66c1a3"
+box   <- "#7fccb2"
+
+cls <- c(lows,nos,drops)
+alpha <- 0.7
+
+library(ggalluvial)
+(p_delta<-ggplot(df,aes(y = n, axis1 = pase_0_cut, axis2 = pase_6_cut)) +
+  geom_alluvium(aes(fill = pase_drop_fac, color=pase_drop_fac), width = 1/10, alpha = alpha, knot.pos = 0.3)+
+  geom_stratum(width = 1/6, fill = box, color = border) +
+  geom_text(stat = "stratum", aes(label=after_stat(stratum))) +
+  scale_x_continuous(breaks = 1:2, labels = c("Pre-stroke\nPASE score\nquartiles", "Six months\nPASE score\nquartiles"))  +
+  scale_fill_manual(values  = cls) +
+  scale_color_manual(values = cls) +
+    scale_y_reverse() + # Easy solution to flip y-axis
+  labs(title="Change in physical activity") +
+    ylab("Quartiles")+
+  theme_minimal() +
+  theme(legend.position = "none", 
+        panel.grid.major = element_blank(), 
+        panel.grid.minor = element_blank(), 
+        axis.text.y = element_blank(),
+        axis.title.y = element_blank(),
+        axis.text.x = element_text(size = 14, face = "bold"), 
+        plot.title = element_text(hjust = 0.5, size = 20, face = "bold")))
+
+ggsave("sankey.png", plot = last_plot(), device = NULL, path = NULL,
+       scale = 1, width = 120, height = 200, dpi = 450, limitsize = TRUE,
+       units = "mm")
+          
--- a/Decline/archive/generation_1/sankey.png
+++ b/Decline/archive/generation_1/sankey.png
--- a/Decline/archive/generation_1/standardise.R
+++ b/Decline/archive/generation_1/standardise.R
@ -0,0 +1,41 @@
+## ItMLiHSmar2022
+## standardise.R, child script
+## Data standardisation, returns list
+## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
+
+standardise<-function(train,test,type){
+  # From:
+  # https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r
+  
+  sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric)
+  cnm<-colnames(Xtrain) # Saving column names for ordering
+  
+  # Subsetting
+  
+  ## Data to treat
+  train.tr<-train[,sel]
+  test.tr<-test[,sel]
+  
+  ## Data to save
+  train.sv<-train[,!sel]
+  test.sv<-test[,!sel]
+  
+  # Calculate mean and SD of train data
+  trainMean <- sapply(train.tr,mean)
+  trainSd <- sapply(train.tr,sd)
+  
+  if (type=="c"){
+    ## centered
+    norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise   
+    norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise   
+  }
+ 
+  if (type=="cs"){
+    ## centered AND scaled (Z-score standardisation)
+    norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/")
+    norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/")
+  }
+  return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original
+              XtestSt=cbind(norm.testData,test.sv)[,cnm]))
+}
+
--- a/Decline/archive/generation_1/table1.RTF
+++ b/Decline/archive/generation_1/table1.RTF
@ -141,7 +141,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 386}}\cell
@ -160,7 +160,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 387}}\cell
@ -179,7 +179,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -198,7 +198,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -710,6 +710,25 @@

 \trowd\trrh0

+\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
+\intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
+\intbl {\f0 {\f0\fs20 391}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
+\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
+\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
+\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
+
+\row
+
+\trowd\trrh0
+
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
 \intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell

--- a/Decline/archive/generation_1/table1_overall.RTF
+++ b/Decline/archive/generation_1/table1_overall.RTF
@ -141,7 +141,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 386}}\cell
@ -160,7 +160,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 387}}\cell
@ -179,7 +179,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -198,7 +198,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell

 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
 \intbl {\f0 {\f0\fs20 391}}\cell
@ -710,6 +710,25 @@

 \trowd\trrh0

+\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
+\intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
+\intbl {\f0 {\f0\fs20 391}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
+\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
+\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
+
+\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
+\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
+
+\row
+
+\trowd\trrh0
+
 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
 \intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell

--- a/Decline/archive/generation_1/table2.RTF
+++ b/Decline/archive/generation_1/table2.RTF
@ -239,7 +239,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -267,7 +267,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.083}}\cell
@ -295,7 +295,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -323,7 +323,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
--- a/Decline/archive/generation_1/table2_sec.RTF
+++ b/Decline/archive/generation_1/table2_sec.RTF
@ -239,7 +239,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
+\intbl {\f0 {\f0\fs20 AFIB}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -267,7 +267,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
+\intbl {\f0 {\f0\fs20 Hypertension}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -295,7 +295,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
+\intbl {\f0 {\f0\fs20 Diabetes}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -323,7 +323,7 @@
 \trowd\trrh0

 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
-\intbl {\f0 {\f0\fs20 Known PAD}}\cell
+\intbl {\f0 {\f0\fs20 PAD}}\cell

 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
 \intbl {\f0 {\f0\fs20 0.000}}\cell
--- a/Decline/archive/generation_1/table3.RTF
+++ b/Decline/archive/generation_1/table3.RTF
--- a/Decline/archive/generation_1/table3_sec.RTF
+++ b/Decline/archive/generation_1/table3_sec.RTF
--- a/Decline/data_format.R
+++ b/Decline/data_format.R
@ -135,6 +135,7 @@ var.labels = c(age="Age",
               pase_decl_rel_fac="PASE score difference, relative F",
               pase_decl_abs_fac="PASE score difference, absolute F",
               pase_drop_fac="PASE first quartile drop F",
+               pase_hop_fac="PASE first quartile hop F",
               pase_diff="PASE absolute decline",
               pase_decl_rel="PASE relative decline",
               pase_0_cut="PASE 0 quartiles",
@ -171,9 +172,9 @@ X_tbl <- X_tbl|>
  mutate(## Relative decline
         pase_diff=(pase_0-pase_6),
         pase_decl_rel = pase_diff/pase_0*100,
-         pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
+         # pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
         ## Absolute decline
-         pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
+         # pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
         ## Drop
         pase_0_cut=quantile_cut(as.numeric(pase_0),
                                 groups=4,
@ -189,42 +190,38 @@ X_tbl <- X_tbl|>
                                 ordered.f = TRUE,
                                 inc.outs = TRUE, 
                                 detail.lst=FALSE),
-         pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")))
+         pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
+         pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))

 Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])

 # Setting final primary output from "pout"
-if (pout=="decl_rel"){
-  X_tbl <- X_tbl|>
-    mutate(group=pase_decl_rel_fac)
-  
-  X_tbl_f <- X_tbl|>
-    filter(pase_0!=0)|>
-    select(-starts_with("pase_"))
-}
-
-if (pout=="decl_abs"){
-  X_tbl <- X_tbl|>
-    mutate(group=pase_decl_rel_fac)
-  
-  X_tbl_f <- X_tbl|>
-    filter(pase_0>=abs_dif)|>
-    select(-starts_with("pase_"))
-}
-
 if (pout=="drop"){
  X_tbl <- X_tbl|>
    mutate(group=pase_drop_fac)
  
-  print(quantile(as.numeric(X_tbl$pase_0),na.rm=T))
-  print(quantile(as.numeric(X_tbl$pase_6),na.rm=T))
-  print(summary(X_tbl$pase_0_cut))
+  # print(quantile(as.numeric(X_tbl$pase_0)))
+  # print(quantile(as.numeric(X_tbl$pase_6)))
+  # print(summary(X_tbl$pase_0_cut))
  
  X_tbl_f <- X_tbl|>
    filter(pase_0_cut!=1)|>
    select(-starts_with("pase_"))
 }

+if (pout=="hop"){
+  X_tbl <- X_tbl|>
+    mutate(group=pase_hop_fac)
+  
+  # print(quantile(as.numeric(X_tbl$pase_0)))
+  # print(quantile(as.numeric(X_tbl$pase_6)))
+  # print(summary(X_tbl$pase_0_cut))
+  
+  X_tbl_f <- X_tbl|>
+    filter(pase_6_cut!=1)|>
+    select(-starts_with("pase_"))
+}
+
 # Excluding one month measures for primary analysis and setting df for table one
 X_tbl_f <- X_tbl_f |> 
  select(-c(who5_score_1,
@ -273,3 +270,7 @@ library(pROC)
 library(gt)
 library(gtsummary)
 library(dplyr)
+library(tidyverse)
+library(glue)
+# library(ggdendro)
+library(corrplot)
--- a/Decline/data_set.R
+++ b/Decline/data_set.R
@ -1,6 +1,6 @@
 ## ItMLiHS assignment data set

-export<-read.csv("/Volumes/Data/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
+export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))

 export<-export[,c("pase_0",
               "age",
--- a/Decline/regularisation_steps.R
+++ b/Decline/regularisation_steps.R
@ -132,11 +132,19 @@ reg_coef_tbl<-tibble(
  # arrange(desc(abs(medianX)))%>%
  gt()

-reg_coef_tbl
-
 ## ====================================================================
 # Step 6: plotting predictive performance
 ## ====================================================================

 reg_cfm<-confusionMatrix(cMatTest)
 reg_auc_sum<-summary(auc_test[,1])
+
+## ====================================================================
+# Step 7: Packing list to save in loop
+## ====================================================================
+
+ls[[i]] <- list("RegularisedCoefs"=reg_coef_tbl,
+                "bestA"=best_alph,
+                "bestL"=best_lamb,
+                "ConfusionMatrx"=reg_cfm,
+                "AUROC"=reg_auc_sum)
--- a/Longterm/survival.R
+++ b/Longterm/survival.R
@ -0,0 +1 @@
+survival
--- a/app/rsconnect/shinyapps.io/cognitiveindex/index_app.dcf
+++ b/app/rsconnect/shinyapps.io/cognitiveindex/index_app.dcf
@ -5,7 +5,7 @@ account: cognitiveindex
 server: shinyapps.io
 hostUrl: https://api.shinyapps.io/v1
 appId: 6803928
-bundleId: 6217136
+bundleId: 6561652
 url: https://cognitiveindex.shinyapps.io/index_app/
-when: 1660899020.73485
-lastSyncTime: 1660899020.73486
+when: 1669625189.89414
+lastSyncTime: 1669625189.89416