all analyses of project one moved to new generation (old saved as gen_1)

This commit is contained in:
AG Damsbo 2022-12-02 16:43:31 +01:00
parent 244add2bb4
commit d6fc414822
29 changed files with 1319 additions and 123 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@ -3,27 +3,31 @@
## ##
## Based on the assignment work from the ISL-course ## Based on the assignment work from the ISL-course
## ##
## Generation 2 - 02.december.2022
## Code preparation for analysis on Denmarks Statistics server with enriched data set.
## ##
## Analysis plan:
## Table 1
## Figure 1: Sankey plot (drop & hop colored)
## Table 2: Linear regression model of pase_6~.
## Table 3: Elastic net prediction models of drop and hop. Performance measures referenced in text.
##
## A Rmarkdown file could be created to write the initial report with main results.
## This code is a bit of a mess, as it is the result of several iterations. It works however.
## ##
## ==================================================================== ## ====================================================================
# Step 0: Primary outcome # Step 0: Primary outcome
## ==================================================================== ## ====================================================================
# Difs # Script to run as hop and drop
rel_dif <- 20 # 20 % difference
abs_dif <- 20 # 20 point diff
# pout <- "diff"
#
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!)
#
pout <- "drop" # Drop to first quartile pout <- "drop" # Drop to first quartile
# decl_rel # decl_rel
# decl_abs # decl_abs
# drop # drop
# hop
## ==================================================================== ## ====================================================================
@ -38,20 +42,6 @@ source("data_set.R")
source("data_format.R") source("data_format.R")
## ====================================================================
# Libraries
## ====================================================================
library(tidyverse)
library(glue)
library(patchwork)
# library(ggdendro)
library(corrplot)
library(gt)
library(gtsummary)
## ==================================================================== ## ====================================================================
## ##
## Baseline ## Baseline
@ -64,30 +54,30 @@ library(gtsummary)
## ==================================================================== ## ====================================================================
lbs<-var.labels[match(colnames(X_tbl), # lbs<-var.labels[match(colnames(X_tbl),
names(var.labels))] # names(var.labels))]
#
ls<-lapply(1:ncol(X_tbl),function(x){ # ls<-lapply(1:ncol(X_tbl),function(x){
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\"")) # as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
}) # })
#
ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"), # ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
by = "group", # by = "group",
missing = "no", # missing = "no",
# label = ls[-length(ls)], ## Removing the last, as this is output # # label = ls[-length(ls)], ## Removing the last, as this is output
value = list(where(is.factor) ~ "2"), # value = list(where(is.factor) ~ "2"),
type = list(mrs_0 ~ "categorical"), # type = list(mrs_0 ~ "categorical"),
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]") # statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>% # )%>%
add_overall() %>% # add_overall() %>%
add_n()%>% # add_n()%>%
as_gt() # as_gt()
#
ts # ts
#
ts_rtf <- file("table1.RTF", "w") # ts_rtf <- file("table1.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf) # writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf) # close(ts_rtf)
@ -104,24 +94,24 @@ ls<-lapply(1:ncol(X_tbl_f),function(x){
# Step 2: table - edited # Step 2: table - edited
## ==================================================================== ## ====================================================================
ts_e<-tbl_summary(X_tbl, # ts_e<-tbl_summary(X_tbl,
missing = "no", # missing = "no",
value = list(where(is.factor) ~ "2"), # value = list(where(is.factor) ~ "2"),
type = list(mrs_0 ~ "categorical", # type = list(mrs_0 ~ "categorical",
mrs_1 ~ "categorical"), # mrs_1 ~ "categorical"),
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]") # statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>% # )%>%
as_gt() # as_gt()
#
ts_e # ts_e
## ==================================================================== ## ====================================================================
# Step 3: table export # Step 3: table export
## ==================================================================== ## ====================================================================
ts_rtf <- file("table1_overall.RTF", "w") # ts_rtf <- file("table1_overall.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf) # writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf) # close(ts_rtf)
## ==================================================================== ## ====================================================================
# Baseline table - by PASE group # Baseline table - by PASE group
@ -138,11 +128,43 @@ ts_q <- X_tbl |>
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]") statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
) |> ) |>
add_overall() |> add_overall() |>
add_n add_n ()
ts_q ts_q
## ====================================================================
# Drops and hops
## ====================================================================
# TRUEs are patients dropping
table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
# TRUEs are percentage of patients inactive before stroke being more active after
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
# TRUEs are percentage of patients being more active after that were inactive before stroke
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
# Difference between hop/no-hop
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
# Stationary low
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
## ====================================================================
# Sankey plot
## ====================================================================
# source("sankey.R")
# p_delta
## ==================================================================== ## ====================================================================
# Six months PASE: Bivariate and multivariate analyses # Six months PASE: Bivariate and multivariate analyses
@ -196,13 +218,22 @@ tbl_merge(list(uv_reg,mu_reg))
## ==================================================================== ## ====================================================================
source("assign_full.R") # source("assign_full.R")
source("regularisation_steps.R")
ls <- list()
for (i in c("drop","hop")){
pout <- i
source("data_format.R")
source("regularisation_steps.R")
}
# Loop to run regularised model on both drop and hop.
# Saved in list for printing and exporting the plot.
## ==================================================================== ## ====================================================================
# Step 1: data merge # Step 1: data merge
## ==================================================================== ## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F) tbl<-merge(ls$drop$RegularisedCoefs$'_data',ls$hop$RegularisedCoefs$'_data',by="name",all.x=T, sort=F)
## ==================================================================== ## ====================================================================
# Step 2: table # Step 2: table
@ -214,22 +245,25 @@ com_coef_tbl<-tbl%>%
rows = everything(), rows = everything(),
decimals = 3)%>% decimals = 3)%>%
tab_spanner( tab_spanner(
label = "Full model", label = "DROP",
columns = 6:8 columns = 2:5
)%>% )%>%
tab_spanner( tab_spanner(
label = paste0("Regularised model, (a=", label = "HOP",
best_alph, columns = 6:9
", l=",
round(best_lamb,3),
")"),
columns = 2:5
)%>% )%>%
tab_header( tab_header(
title = "Model coefficients", title = "Model coefficients",
subtitle = "Combined table of both full and regularised model coefficients" subtitle = "Combined table of both full and regularised model coefficients"
) )
# paste0("Regularised model, (a=",
# best_alph,
# ", l=",
# round(best_lamb,3),
# ")")
com_coef_tbl com_coef_tbl
## ==================================================================== ## ====================================================================
@ -249,12 +283,6 @@ close(com_coef_rtf)
## ##
## ==================================================================== ## ====================================================================
# ROC curve of best model
p_roc
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
## ==================================================================== ## ====================================================================
# Step 1: data set # Step 1: data set
## ==================================================================== ## ====================================================================

BIN
1 PA Decline/archive/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,409 @@
##
## Master script
##
## Based on the assignment work from the ISL-course
##
##
##
## ====================================================================
# Step 0: Primary outcome
## ====================================================================
# Difs
rel_dif <- 20 # 20 % difference
abs_dif <- 20 # 20 point diff
# pout <- "diff"
#
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!)
#
pout <- "drop" # Drop to first quartile
# decl_rel
# decl_abs
# drop
# hop
## ====================================================================
## Data
## ====================================================================
setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
source("data_set.R")
# Loading data-set from USB, to not store on computer
source("data_format.R")
## ====================================================================
# Libraries
## ====================================================================
library(tidyverse)
library(glue)
library(patchwork)
# library(ggdendro)
library(corrplot)
library(gt)
library(gtsummary)
## ====================================================================
##
## Baseline
##
## ====================================================================
## ====================================================================
# Step 0: labels
## ====================================================================
lbs<-var.labels[match(colnames(X_tbl),
names(var.labels))]
ls<-lapply(1:ncol(X_tbl),function(x){
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
})
ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
by = "group",
missing = "no",
# label = ls[-length(ls)], ## Removing the last, as this is output
value = list(where(is.factor) ~ "2"),
type = list(mrs_0 ~ "categorical"),
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>%
add_overall() %>%
add_n()%>%
as_gt()
ts
ts_rtf <- file("table1.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf)
## ====================================================================
# Step 1: labels
## ====================================================================
lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]
ls<-lapply(1:ncol(X_tbl_f),function(x){
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
})
## ====================================================================
# Step 2: table - edited
## ====================================================================
ts_e<-tbl_summary(X_tbl,
missing = "no",
value = list(where(is.factor) ~ "2"),
type = list(mrs_0 ~ "categorical",
mrs_1 ~ "categorical"),
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
)%>%
as_gt()
ts_e
## ====================================================================
# Step 3: table export
## ====================================================================
ts_rtf <- file("table1_overall.RTF", "w")
writeLines(ts%>%as_rtf(), ts_rtf)
close(ts_rtf)
## ====================================================================
# Baseline table - by PASE group
## ====================================================================
ts_q <- X_tbl |>
select(vars) |>
mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |>
select(-pase_6,-pase_0) |>
tbl_summary(missing = "no",
by="pase_0_cut",
value = list(where(is.factor) ~ "2"),
type = list(mrs_0 ~ "categorical"),
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
) |>
add_overall() |>
add_n ()
ts_q
## ====================================================================
# Drops and hops
## ====================================================================
# TRUEs are patients dropping
table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
# TRUEs are percentage of patients inactive before stroke being more active after
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
# TRUEs are percentage of patients being more active after that were inactive before stroke
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
# Difference between hop/no-hop
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
# Stationary low
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
## ====================================================================
# Sankey plot
## ====================================================================
if (pout=="drop"){
source("sankey.R")
p_delta
}
## ====================================================================
# Six months PASE: Bivariate and multivariate analyses
## ====================================================================
dta_lmreg <- X_tbl |>
select(vars) |>
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)))
Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"
uv_reg <- tbl_uvregression(data=dta_lmreg,
method=lm,
y="pase_6",
show_single_row = where(is.factor),
estimate_fun = ~style_sigfig(.x,digits = 3),
pvalue_fun = ~style_pvalue(.x, digits = 3)
)
mu_reg <- dta_lmreg |>
lm(formula=pase_6~.,data=_) |>
tbl_regression(show_single_row = where(is.factor),
estimate_fun = ~style_sigfig(.x,digits = 3),
pvalue_fun = ~style_pvalue(.x, digits = 3)
)|>
add_n()
tbl_merge(list(uv_reg,mu_reg))
## ====================================================================
##
## Data variance
##
## Illustrating principal components.
##
## ====================================================================
# source("PCA.R")
#
#
# pca22
# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
## ====================================================================
##
## Models
##
## ====================================================================
source("assign_full.R")
source("regularisation_steps.R")
## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
gt()%>%
fmt_number(
columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
rows = everything(),
decimals = 3)%>%
tab_spanner(
label = "Full model",
columns = 6:8
)%>%
tab_spanner(
label = paste0("Regularised model, (a=",
best_alph,
", l=",
round(best_lamb,3),
")"),
columns = 2:5
)%>%
tab_header(
title = "Model coefficients",
subtitle = "Combined table of both full and regularised model coefficients"
)
com_coef_tbl
## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)
## ====================================================================
##
## Model performance
##
## Table with performance meassures for the two different models.
##
## ====================================================================
# ROC curve of best model
p_roc
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
"Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
"Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
gt()%>%
tab_header(
title = "Performance meassures",
subtitle = "Combined table of both full and regularised performance meassures"
)
tbl_perf
## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)
#
## ====================================================================
##
## Secondary analysis
##
## ====================================================================
Xy<-dta_s
X<-dta_s|>select(-group)
y<-dta_s$group
source("assign_full.R")
source("regularisation_steps.R")
## ====================================================================
# Step 1: data merge
## ====================================================================
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
## ====================================================================
# Step 2: table
## ====================================================================
com_coef_tbl<-tbl%>%
gt()%>%
fmt_number(
columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
rows = everything(),
decimals = 3)%>%
tab_spanner(
label = "Full model",
columns = 6:8
)%>%
tab_spanner(
label = paste0("Regularised model, (a=",
best_alph,
", l=",
round(best_lamb,3),
")"),
columns = 2:5
)%>%
tab_header(
title = "Model coefficients",
subtitle = "Combined table of both full and regularised model coefficients"
)
com_coef_tbl
## ====================================================================
# Step 3: export
## ====================================================================
com_coef_rtf <- file("table2_sec.RTF", "w")
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
close(com_coef_rtf)
## ====================================================================
##
## Model performance
##
## Table with performance meassures for the two different models.
##
## ====================================================================
## ====================================================================
# Step 1: data set
## ====================================================================
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
"Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
"Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
## ====================================================================
# Step 2: table
## ====================================================================
tbl_perf<-tbl%>%
gt()%>%
tab_header(
title = "Performance meassures",
subtitle = "Combined table of both full and regularised performance meassures"
)
tbl_perf
## ====================================================================
# Step 3: export
## ====================================================================
tbl_perf_rtf <- file("table3_sec.RTF", "w")
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
close(tbl_perf_rtf)

View File

@ -0,0 +1,290 @@
## ItMLiHSmar2022
## data_format.R, child script
## Data formatting and handling
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
##
## Now modified to use in publication
##
## ====================================================================
# Step 1: Loading libraries
## ====================================================================
library(Hmisc)
library(dplyr)
library(daDoctoR)
library(tidyselect)
## ====================================================================
# Step 2: Loading data
## ====================================================================
# rm(list = ls()) # Clear
# setwd("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/")
# dta<-read.csv("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv")
## ====================================================================
# Step 3: Formatting variables
## ====================================================================
dta <- export %>%
# as_tibble()%>%
mutate(any_rep=factor(ifelse(thrombolysis=="yes"|thrombechtomy=="yes","yes","no")), # If not noted, no therapy was received
male_sex= factor(ifelse(sex=="female","no","yes")),
# smoke_ever=factor(ifelse(smoke_ever=="never","no","yes")),
civil=factor(ifelse(civil=="partner","no","yes")), # Sets "yes" for not-cohabiting
rtreat=factor(ifelse(rtreat=="Placebo","no","yes")), # "Yes" receives active treatment
alc=factor(ifelse(alc=="more","yes","no")), # Yes for more than guideline
pase_0=as.numeric(pase_0),
pase_6=as.numeric(pase_6),
across(c("diabetes",
"hypertension",
"smoker",
# "smoker_prev",
"afli",
"pad",
"ami",
"tci",
"mrs_0",
"mrs_1"),as.factor),
across(c("nihss_c",
"age",
"mdi_1", # For "enriched" analysis
"who5_score_1",
"mfi_gen_1",
"mfi_phys_1",
"mfi_act_1",
"mfi_mot_1",
"mfi_men_1"),as.numeric )
)%>%
select(-c(sex))
## ====================================================================
# Step 4: Defining outcome
## ====================================================================
## Changed to step 7
## This is to perform proper quantile split based on actually included.
## ====================================================================
# Step 5: Ordering variables
## ====================================================================
vars <- c("age",
"male_sex",
"civil",
"pase_0",
"smoker",
"alc",
"afli",
"hypertension",
"diabetes",
"pad",
"ami",
"tci",
"mrs_0",
"nihss_c",
"any_rep",
"rtreat",
"pase_6")
dta<-select(dta,c(vars,
"mrs_1",
"mfi_gen_1",
"mfi_phys_1",
"mfi_act_1",
"mfi_mot_1",
"mfi_men_1",
"mdi_1",
"who5_score_1"
))
## ====================================================================
# Step 6: Labeling
## ====================================================================
var.labels = c(age="Age",
male_sex="Male",
civil="Living alone",
pase_0="Pre-stroke PASE score",
pase_6="Six month PASE score",
smoker="Daily or occasinally smoking",
# smoker_prev="Previous habbit of smoking",
alc="More alcohol than recommendation",
afli="AFIB",
hypertension="Hypertension",
diabetes="Diabetes",
pad="PAD",
ami="Previous MI",
tci="Previous TIA",
mrs_0="Pre-stroke mRS [-1]",
nihss_c="Acute NIHSS score",
thrombolysis="Acute thrombolysis",
thrombechtomy="Acute thrombechtomy",
any_rep="Any reperfusion therapy",
rtreat="Active trial treatment",
mrs_1="One month mRS [-1]",
mfi_gen_1="One month MFI (General fatigue)",
mfi_phys_1="One month MFI (Physical fatigue)",
mfi_act_1="One month MFI (Reduced activity)",
mfi_mot_1="One month MFI (Reduced motivation)",
mfi_men_1="One month MFI (Mental fatigue)",
mdi_1="One month MDI",
who5_score_1="One month WHO5",
pase_decl_rel_fac="PASE score difference, relative F",
pase_decl_abs_fac="PASE score difference, absolute F",
pase_drop_fac="PASE first quartile drop F",
pase_hop_fac="PASE first quartile hop F",
pase_diff="PASE absolute decline",
pase_decl_rel="PASE relative decline",
pase_0_cut="PASE 0 quartiles",
pase_6_cut="PASE 6 quartiles")
## Labelling based on outcome flag
if (pout=="decl_rel"|pout=="decl_abs"){
var.labels = c(var.labels,group="PASE decline")}
if (pout=="drop"){
var.labels = c(var.labels,group="PASE drop")}
## ====================================================================
# Step 7: final data export
## ====================================================================
data_summary<-summary(dta)
# Saving "old" factorised variables
sel<-sapply(dta,is.factor)
# Reformatting factors as 1/2 for analysis
dta<-dta |>
mutate(across(where(is.factor), as.numeric))|> # Turning factors into 1(no) or 2(yes) for model. Numbered alphabetically.
mutate(across(matches(colnames(dta)[sel]), as.factor),
across(starts_with("pase_"), as.numeric))
# Filtering out non-PASE
X_tbl<-dta |>
filter(!is.na(pase_0),!is.na(pase_6))
nrow(X_tbl)
# Defining possible outcome meassures. Keeping in df for characterisation
X_tbl <- X_tbl|>
mutate(## Relative decline
pase_diff=(pase_0-pase_6),
pase_decl_rel = pase_diff/pase_0*100,
pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
## Absolute decline
pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
## Drop
pase_0_cut=quantile_cut(as.numeric(pase_0),
groups=4,
group.names = c(as.character(1:4)),
y=as.numeric(pase_0),
ordered.f = TRUE,
inc.outs = TRUE,
detail.lst=FALSE),
pase_6_cut=quantile_cut(as.numeric(pase_6),
groups=4,
group.names = c(as.character(1:4)),
y=as.numeric(pase_0),
ordered.f = TRUE,
inc.outs = TRUE,
detail.lst=FALSE),
pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
# Setting final primary output from "pout"
if (pout=="decl_rel"){
X_tbl <- X_tbl|>
mutate(group=pase_decl_rel_fac)
X_tbl_f <- X_tbl|>
filter(pase_0!=0)|>
select(-starts_with("pase_"))
}
if (pout=="decl_abs"){
X_tbl <- X_tbl|>
mutate(group=pase_decl_rel_fac)
X_tbl_f <- X_tbl|>
filter(pase_0>=abs_dif)|>
select(-starts_with("pase_"))
}
if (pout=="drop"){
X_tbl <- X_tbl|>
mutate(group=pase_drop_fac)
# print(quantile(as.numeric(X_tbl$pase_0)))
# print(quantile(as.numeric(X_tbl$pase_6)))
# print(summary(X_tbl$pase_0_cut))
X_tbl_f <- X_tbl|>
filter(pase_0_cut!=1)|>
select(-starts_with("pase_"))
}
if (pout=="hop"){
X_tbl <- X_tbl|>
mutate(group=pase_hop_fac)
# print(quantile(as.numeric(X_tbl$pase_0)))
# print(quantile(as.numeric(X_tbl$pase_6)))
# print(summary(X_tbl$pase_0_cut))
X_tbl_f <- X_tbl|>
filter(pase_6_cut!=1)|>
select(-starts_with("pase_"))
}
# Excluding one month measures for primary analysis and setting df for table one
X_tbl_f <- X_tbl_f |>
select(-c(who5_score_1,
mdi_1,
mrs_1,
starts_with("mfi_"))) # Left out of model as no present in drop-group
# Dropping non-complete for analysis
Xy <- X_tbl_f|>
na.omit()|> # Keeping only complete observations
select(-c(tci) # Left out of model as no present in drop-group
)|>
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) # Sets binary mRS 0 to include in glmnet, 0 or above
label(Xy) = as.list(var.labels[match(names(Xy), names(var.labels))])
X<-dplyr::select(Xy,-c(group, -starts_with("pase_")) # Exclude primary outcome
)
y<-Xy$group
## ====================================================================
# Secondary analysis
## ====================================================================
dta_s<-X_tbl|>
select(-c(tci),
-starts_with("pase_"))|>
na.omit()|>
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)),# Sets binary mRS 0 to include in glmnet, 0 or above
mrs_1=factor(ifelse(mrs_1==1,1,2)))# Sets binary mRS 1 to include in glmnet, 0 or above
label(dta_s) = as.list(var.labels[match(names(dta_s), names(var.labels))])
## ====================================================================
# Step 8: Loading rest of libraries
## ====================================================================
library(tidyverse)
library(patchwork)
library(caret)
library(glmnet)
library(leaps)
library(pROC)
library(gt)
library(gtsummary)
library(dplyr)

View File

@ -0,0 +1,49 @@
## ItMLiHS assignment data set
export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
export<-export[,c("pase_0",
"age",
"sex",
"civil",
"smoke_ever",
"smoker",
"rtreat",
"alc",
"afli",
"hypertension",
"diabetes",
"mrs_0",
"nihss_c",
"thrombolysis",
"pad",
"thrombechtomy",
"ami",
"tci",
"pase_6",
"mrs_1",
"mfi_gen_1",
"mfi_phys_1",
"mfi_act_1",
"mfi_mot_1",
"mfi_men_1",
"mdi_1",
"who5_score_1")]
export$diabetes[is.na(export$diabetes)]<-"no"
export$diabetes[is.na(export$hypertension)]<-"no"
export$thrombolysis[is.na(export$thrombolysis)]<-"no"
export$thrombechtomy[is.na(export$thrombechtomy)]<-"no"
export$pad[is.na(export$pad)]<-"no"
export$ami[is.na(export$ami)]<-"no"
# export$smoker_prev <- ifelse(export$smoker=="3","yes","no")
export$smoker <- ifelse(export$smoker=="1","yes","no")
export$smoker[is.na(export$smoker)] <- "no"
# export$mrs_0[export$mrs_0==3]<-NA
# export<-na.omit(export)
export
# write.csv(export,"/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv",row.names = FALSE)

View File

Before

Width:  |  Height:  |  Size: 317 KiB

After

Width:  |  Height:  |  Size: 317 KiB

View File

@ -0,0 +1,117 @@
## ItMLiHSmar2022
## regular_fun.R, child script
## Regularisation model building function
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
##
## Now modified to use in publication
##
regular_fun<-function(X,y,K,lambdas,alpha){
n<-nrow(X)
set.seed(321)
# Using caret function to ensure both levels represented in all folds
c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE)
B<-yhatTestProbKeep<-list()
accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))
catinfo<-levels(y)
cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))
## Iterate over partitions
for (idx1 in 1:K){
# Status
cat('Processing fold', idx1, 'of', K,'\n')
# idx1=1
# Get training- and test sets
I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
I_test = !I_train
Xtrain = X[I_train,]
ytrain = y[I_train]
Xtest = X[I_test,]
ytest = y[I_test]
## Model matrices for glmnet
## Using the complicated approach not to include first level.
# Xmat.train<-model.matrix(~ .-1, data=Xtrain,
# contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)],
# contrasts, contrasts=T))
# Xmat.test<-model.matrix(~ .-1, data=Xtest,
# contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)],
# contrasts, contrasts=T))
# Xmat.train<-model.matrix(~.-1,Xtrain)
# Xmat.test<-model.matrix(~.-1,Xtest)
# Weights
ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
# ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
# Fit regularized linear regression model
mod<-glmnet(Xtrain, ytrain,
alpha = alpha, ## Alpha = 1 for lasso
lambda = lambdas, ## Setting lambdas
standardize = TRUE, ## Scales and centers
weights = ytrain_weight,
family = "binomial"
)
# Keep coefficients for plot
B[[idx1]] <- as.matrix(coef(mod))
# Iterate over regularization strengths to compute training- and test
# errors for individual regularization strengths.
for (idx2 in 1:length(lambdas)){
# idx2=1
# Predict
yhatTrainProb<-predict(mod,
s = lambdas[idx2],
newx = data.matrix(Xtrain),
type = "response"
)
yhatTestProb<-predict(mod,
s = lambdas[idx2],
newx = data.matrix(Xtest),
type = "response"
)
# Compute training and test error
yhatTrain = round(yhatTrainProb)
yhatTest = round(yhatTestProb)
# Make predictions categorical again (instead of 0/1 coding)
yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
# Evaluate classifier performance
# Accuracy
# accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
# accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
# #
# # Error rate
# err_train[idx1,idx2] = 1 - accTrain[idx1,idx2]
# err_test [idx1,idx2] = 1 - accTest[idx1,idx2]
# AUROC
suppressMessages(
auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
suppressMessages(
auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
# Compute confusion matrices
cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
}
}
ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
return(ls)
}

View File

@ -0,0 +1,142 @@
## ItMLiHSmar2022
## regularisation_steps.R, child script
## Regularised model building and analysation for assignment
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
##
## Now modified to use in publication
##
## ====================================================================
## Step 0: data import and wrangling
## ====================================================================
setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
# source("data_format.R")
y1<-factor(as.integer(y)-1) ## Outcome is required to be factor of 0 or 1.
## ====================================================================
## Step 1: settings
## ====================================================================
## Folds
K=10
set.seed(3)
c<-caret::createFolds(y=y,
k = K,
list = FALSE,
returnTrain = TRUE) # Foldids for alpha tuning
## Defining tuning parameters
lambdas=2^seq(-10, 5, 1)
alphas<-seq(0,1,.1)
## Weights for models
weighted=TRUE
if (weighted == TRUE) {
wght<-as.vector(1 - (table(y)[y] / length(y)))
} else {
wght <- rep(1, nrow(y))
}
## Standardise numeric
## Centered and
## ====================================================================
## Step 2: all cross validations for each alpha
## ====================================================================
library(furrr)
library(purrr)
library(doMC)
registerDoMC(cores=6)
# Nested CVs with analysis for all lambdas for each alpha
#
set.seed(3)
cvs <- future_map(alphas, function(a){
cv.glmnet(model.matrix(~.-1,X),
y1,
weights = wght,
lambda=lambdas,
type.measure = "deviance", # This is standard measure and recommended for tuning
foldid = c, # Per recommendation the folds are kept for alpha optimisation
alpha=a,
standardize=TRUE,
family=quasibinomial,
keep=TRUE) # Same as binomial, but not as picky
})
## ====================================================================
# Step 3: optimum lambda for each alpha
## ====================================================================
# For each alpha, lambda is chosen for the lowest meassure (deviance)
each_alpha <- sapply(seq_along(alphas), function(id) {
each_cv <- cvs[[id]]
alpha_val <- alphas[id]
index_lmin <- match(each_cv$lambda.min,
each_cv$lambda)
c(lamb = each_cv$lambda.min,
alph = alpha_val,
cvm = each_cv$cvm[index_lmin])
})
# Best lambda
best_lamb <- min(each_alpha["lamb", ])
# Alpha is chosen for best lambda with lowest model deviance, each_alpha["cvm",]
best_alph <- each_alpha["alph",][each_alpha["cvm",]==min(each_alpha["cvm",]
[each_alpha["lamb",] %in% best_lamb])]
## https://stackoverflow.com/questions/42007313/plot-an-roc-curve-in-r-with-ggplot2
p_roc<-roc.glmnet(cvs[[1]]$fit.preval, newy = y)[[match(best_alph,alphas)]]|> # Plots performance from model with best alpha
ggplot(aes(FPR,TPR)) +
geom_step() +
coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
geom_abline()+
theme_bw()
## ====================================================================
# Step 4: Creating the final model
## ====================================================================
source("regular_fun.R") # Custom function
optimised_model<-regular_fun(X,y1,K,lambdas=best_lamb,alpha=best_alph)
# With lambda and alpha specified, the function is just a k-fold cross-validation wrapper,
# but keeps model performance figures from each fold.
list2env(optimised_model,.GlobalEnv)
# Function outputs a list, which is unwrapped to Env.
# See source script for reference.
## ====================================================================
# Step 5: creating table of coefficients for inference
## ====================================================================
Bmatrix<-matrix(unlist(B),ncol=10)
Bmedian<-apply(Bmatrix,1,median)
Bmean<-apply(Bmatrix,1,mean)
reg_coef_tbl<-tibble(
name = c("Intercept",Hmisc::label(X)),
medianX = round(Bmedian,5),
ORmed = round(exp(Bmedian),5),
meanX = round(Bmean,5),
ORmea = round(exp(Bmean),5))%>%
# arrange(desc(abs(medianX)))%>%
gt()
reg_coef_tbl
## ====================================================================
# Step 6: plotting predictive performance
## ====================================================================
reg_cfm<-confusionMatrix(cMatTest)
reg_auc_sum<-summary(auc_test[,1])

View File

Before

Width:  |  Height:  |  Size: 63 KiB

After

Width:  |  Height:  |  Size: 63 KiB

View File

@ -0,0 +1,72 @@
#
# Sankey plot of quartile movement for drops
#
gth<-X_tbl[,c("pase_0_cut","pase_6_cut","pase_drop_fac")]
gth$pase_drop_fac <- factor(ifelse(gth$pase_0_cut=="1",
"low",
gth$pase_drop_fac),
labels = c("no","yes","low")) # Tried and tried to do vectorised, but failed. Matrices acting up..
# Visuals - sankey
# https://stackoverflow.com/questions/50395027/beautifying-sankey-alluvial-visualization-using-r
## Painting
# LOOK AT THIS GREAT FUNCTION!! Wide pivot format. Includes factor for possible quartile-colouring.
df<-data.frame(gth %>% count(pase_0_cut,pase_6_cut,pase_drop_fac))
lbs0<-c(paste0("1st\n(n=",sum(df$n[df[1]=="1"]),")"),
paste0("2nd\n(n=",sum(df$n[df[1]=="2"]),")"),
paste0("3rd\n(n=",sum(df$n[df[1]=="3"]),")"),
paste0("4th\n(n=",sum(df$n[df[1]=="4"]),")"))
lbs6<-c(paste0("1st\n(n=",sum(df$n[df[2]=="1"]),")"),
paste0("2nd\n(n=",sum(df$n[df[2]=="2"]),")"),
paste0("3rd\n(n=",sum(df$n[df[2]=="3"]),")"),
paste0("4th\n(n=",sum(df$n[df[2]=="4"]),")"))
df[1:2] <- as_factor(df[1:2])
levels(df[,1])<-lbs0[1:length(levels(df[,1]))]
levels(df[,2])<-lbs6[1:length(levels(df[,2]))]
df[,3]<-factor(df[,3],levels=c("low","no","yes"))
lows <- "grey80" # grey
drops <- "#990033" # Midtrød
nos <- "grey50"
nas <- "grey90"
border<- "#66c1a3"
box <- "#7fccb2"
cls <- c(lows,nos,drops)
alpha <- 0.7
library(ggalluvial)
(p_delta<-ggplot(df,aes(y = n, axis1 = pase_0_cut, axis2 = pase_6_cut)) +
geom_alluvium(aes(fill = pase_drop_fac, color=pase_drop_fac), width = 1/10, alpha = alpha, knot.pos = 0.3)+
geom_stratum(width = 1/6, fill = box, color = border) +
geom_text(stat = "stratum", aes(label=after_stat(stratum))) +
scale_x_continuous(breaks = 1:2, labels = c("Pre-stroke\nPASE score\nquartiles", "Six months\nPASE score\nquartiles")) +
scale_fill_manual(values = cls) +
scale_color_manual(values = cls) +
scale_y_reverse() + # Easy solution to flip y-axis
labs(title="Change in physical activity") +
ylab("Quartiles")+
theme_minimal() +
theme(legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 14, face = "bold"),
plot.title = element_text(hjust = 0.5, size = 20, face = "bold")))
ggsave("sankey.png", plot = last_plot(), device = NULL, path = NULL,
scale = 1, width = 120, height = 200, dpi = 450, limitsize = TRUE,
units = "mm")

Binary file not shown.

After

Width:  |  Height:  |  Size: 695 KiB

View File

@ -0,0 +1,41 @@
## ItMLiHSmar2022
## standardise.R, child script
## Data standardisation, returns list
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
standardise<-function(train,test,type){
# From:
# https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r
sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric)
cnm<-colnames(Xtrain) # Saving column names for ordering
# Subsetting
## Data to treat
train.tr<-train[,sel]
test.tr<-test[,sel]
## Data to save
train.sv<-train[,!sel]
test.sv<-test[,!sel]
# Calculate mean and SD of train data
trainMean <- sapply(train.tr,mean)
trainSd <- sapply(train.tr,sd)
if (type=="c"){
## centered
norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
}
if (type=="cs"){
## centered AND scaled (Z-score standardisation)
norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/")
norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/")
}
return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original
XtestSt=cbind(norm.testData,test.sv)[,cnm]))
}

View File

@ -141,7 +141,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell \intbl {\f0 {\f0\fs20 AFIB}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 386}}\cell \intbl {\f0 {\f0\fs20 386}}\cell
@ -160,7 +160,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell \intbl {\f0 {\f0\fs20 Hypertension}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 387}}\cell \intbl {\f0 {\f0\fs20 387}}\cell
@ -179,7 +179,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell \intbl {\f0 {\f0\fs20 Diabetes}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 391}}\cell \intbl {\f0 {\f0\fs20 391}}\cell
@ -198,7 +198,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known PAD}}\cell \intbl {\f0 {\f0\fs20 PAD}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 391}}\cell \intbl {\f0 {\f0\fs20 391}}\cell
@ -710,6 +710,25 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 391}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
\row
\trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
\intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell \intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell

View File

@ -141,7 +141,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell \intbl {\f0 {\f0\fs20 AFIB}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 386}}\cell \intbl {\f0 {\f0\fs20 386}}\cell
@ -160,7 +160,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell \intbl {\f0 {\f0\fs20 Hypertension}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 387}}\cell \intbl {\f0 {\f0\fs20 387}}\cell
@ -179,7 +179,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell \intbl {\f0 {\f0\fs20 Diabetes}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 391}}\cell \intbl {\f0 {\f0\fs20 391}}\cell
@ -198,7 +198,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 Known PAD}}\cell \intbl {\f0 {\f0\fs20 PAD}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744 \pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 391}}\cell \intbl {\f0 {\f0\fs20 391}}\cell
@ -710,6 +710,25 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
\intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
\intbl {\f0 {\f0\fs20 391}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
\row
\trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
\intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell \intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell

View File

@ -239,7 +239,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell \intbl {\f0 {\f0\fs20 AFIB}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -267,7 +267,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell \intbl {\f0 {\f0\fs20 Hypertension}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.083}}\cell \intbl {\f0 {\f0\fs20 0.083}}\cell
@ -295,7 +295,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell \intbl {\f0 {\f0\fs20 Diabetes}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -323,7 +323,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known PAD}}\cell \intbl {\f0 {\f0\fs20 PAD}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell

View File

@ -239,7 +239,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell \intbl {\f0 {\f0\fs20 AFIB}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -267,7 +267,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell \intbl {\f0 {\f0\fs20 Hypertension}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -295,7 +295,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell \intbl {\f0 {\f0\fs20 Diabetes}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell
@ -323,7 +323,7 @@
\trowd\trrh0 \trowd\trrh0
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170 \pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
\intbl {\f0 {\f0\fs20 Known PAD}}\cell \intbl {\f0 {\f0\fs20 PAD}}\cell
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340 \pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
\intbl {\f0 {\f0\fs20 0.000}}\cell \intbl {\f0 {\f0\fs20 0.000}}\cell

View File

@ -135,6 +135,7 @@ var.labels = c(age="Age",
pase_decl_rel_fac="PASE score difference, relative F", pase_decl_rel_fac="PASE score difference, relative F",
pase_decl_abs_fac="PASE score difference, absolute F", pase_decl_abs_fac="PASE score difference, absolute F",
pase_drop_fac="PASE first quartile drop F", pase_drop_fac="PASE first quartile drop F",
pase_hop_fac="PASE first quartile hop F",
pase_diff="PASE absolute decline", pase_diff="PASE absolute decline",
pase_decl_rel="PASE relative decline", pase_decl_rel="PASE relative decline",
pase_0_cut="PASE 0 quartiles", pase_0_cut="PASE 0 quartiles",
@ -171,9 +172,9 @@ X_tbl <- X_tbl|>
mutate(## Relative decline mutate(## Relative decline
pase_diff=(pase_0-pase_6), pase_diff=(pase_0-pase_6),
pase_decl_rel = pase_diff/pase_0*100, pase_decl_rel = pase_diff/pase_0*100,
pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")), # pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
## Absolute decline ## Absolute decline
pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")), # pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
## Drop ## Drop
pase_0_cut=quantile_cut(as.numeric(pase_0), pase_0_cut=quantile_cut(as.numeric(pase_0),
groups=4, groups=4,
@ -189,42 +190,38 @@ X_tbl <- X_tbl|>
ordered.f = TRUE, ordered.f = TRUE,
inc.outs = TRUE, inc.outs = TRUE,
detail.lst=FALSE), detail.lst=FALSE),
pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no"))) pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))]) Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
# Setting final primary output from "pout" # Setting final primary output from "pout"
if (pout=="decl_rel"){
X_tbl <- X_tbl|>
mutate(group=pase_decl_rel_fac)
X_tbl_f <- X_tbl|>
filter(pase_0!=0)|>
select(-starts_with("pase_"))
}
if (pout=="decl_abs"){
X_tbl <- X_tbl|>
mutate(group=pase_decl_rel_fac)
X_tbl_f <- X_tbl|>
filter(pase_0>=abs_dif)|>
select(-starts_with("pase_"))
}
if (pout=="drop"){ if (pout=="drop"){
X_tbl <- X_tbl|> X_tbl <- X_tbl|>
mutate(group=pase_drop_fac) mutate(group=pase_drop_fac)
print(quantile(as.numeric(X_tbl$pase_0),na.rm=T)) # print(quantile(as.numeric(X_tbl$pase_0)))
print(quantile(as.numeric(X_tbl$pase_6),na.rm=T)) # print(quantile(as.numeric(X_tbl$pase_6)))
print(summary(X_tbl$pase_0_cut)) # print(summary(X_tbl$pase_0_cut))
X_tbl_f <- X_tbl|> X_tbl_f <- X_tbl|>
filter(pase_0_cut!=1)|> filter(pase_0_cut!=1)|>
select(-starts_with("pase_")) select(-starts_with("pase_"))
} }
if (pout=="hop"){
X_tbl <- X_tbl|>
mutate(group=pase_hop_fac)
# print(quantile(as.numeric(X_tbl$pase_0)))
# print(quantile(as.numeric(X_tbl$pase_6)))
# print(summary(X_tbl$pase_0_cut))
X_tbl_f <- X_tbl|>
filter(pase_6_cut!=1)|>
select(-starts_with("pase_"))
}
# Excluding one month measures for primary analysis and setting df for table one # Excluding one month measures for primary analysis and setting df for table one
X_tbl_f <- X_tbl_f |> X_tbl_f <- X_tbl_f |>
select(-c(who5_score_1, select(-c(who5_score_1,
@ -273,3 +270,7 @@ library(pROC)
library(gt) library(gt)
library(gtsummary) library(gtsummary)
library(dplyr) library(dplyr)
library(tidyverse)
library(glue)
# library(ggdendro)
library(corrplot)

View File

@ -1,6 +1,6 @@
## ItMLiHS assignment data set ## ItMLiHS assignment data set
export<-read.csv("/Volumes/Data/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown")) export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
export<-export[,c("pase_0", export<-export[,c("pase_0",
"age", "age",

View File

@ -132,11 +132,19 @@ reg_coef_tbl<-tibble(
# arrange(desc(abs(medianX)))%>% # arrange(desc(abs(medianX)))%>%
gt() gt()
reg_coef_tbl
## ==================================================================== ## ====================================================================
# Step 6: plotting predictive performance # Step 6: plotting predictive performance
## ==================================================================== ## ====================================================================
reg_cfm<-confusionMatrix(cMatTest) reg_cfm<-confusionMatrix(cMatTest)
reg_auc_sum<-summary(auc_test[,1]) reg_auc_sum<-summary(auc_test[,1])
## ====================================================================
# Step 7: Packing list to save in loop
## ====================================================================
ls[[i]] <- list("RegularisedCoefs"=reg_coef_tbl,
"bestA"=best_alph,
"bestL"=best_lamb,
"ConfusionMatrx"=reg_cfm,
"AUROC"=reg_auc_sum)

1
2 Longterm/survival.R Normal file
View File

@ -0,0 +1 @@
survival

View File

@ -5,7 +5,7 @@ account: cognitiveindex
server: shinyapps.io server: shinyapps.io
hostUrl: https://api.shinyapps.io/v1 hostUrl: https://api.shinyapps.io/v1
appId: 6803928 appId: 6803928
bundleId: 6217136 bundleId: 6561652
url: https://cognitiveindex.shinyapps.io/index_app/ url: https://cognitiveindex.shinyapps.io/index_app/
when: 1660899020.73485 when: 1669625189.89414
lastSyncTime: 1660899020.73486 lastSyncTime: 1669625189.89416