410 lines
12 KiB
R
410 lines
12 KiB
R
|
##
|
||
|
## Master script
|
||
|
##
|
||
|
## Based on the assignment work from the ISL-course
|
||
|
##
|
||
|
##
|
||
|
##
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 0: Primary outcome
|
||
|
## ====================================================================
|
||
|
|
||
|
# Difs
|
||
|
rel_dif <- 20 # 20 % difference
|
||
|
abs_dif <- 20 # 20 point diff
|
||
|
|
||
|
# pout <- "diff"
|
||
|
#
|
||
|
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
|
||
|
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!)
|
||
|
#
|
||
|
pout <- "drop" # Drop to first quartile
|
||
|
|
||
|
# decl_rel
|
||
|
# decl_abs
|
||
|
# drop
|
||
|
# hop
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
## Data
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
|
||
|
|
||
|
source("data_set.R")
|
||
|
# Loading data-set from USB, to not store on computer
|
||
|
|
||
|
source("data_format.R")
|
||
|
|
||
|
## ====================================================================
|
||
|
# Libraries
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
library(tidyverse)
|
||
|
library(glue)
|
||
|
library(patchwork)
|
||
|
# library(ggdendro)
|
||
|
library(corrplot)
|
||
|
library(gt)
|
||
|
library(gtsummary)
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
##
|
||
|
## Baseline
|
||
|
##
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 0: labels
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
lbs<-var.labels[match(colnames(X_tbl),
|
||
|
names(var.labels))]
|
||
|
|
||
|
ls<-lapply(1:ncol(X_tbl),function(x){
|
||
|
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
|
||
|
})
|
||
|
|
||
|
ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
|
||
|
by = "group",
|
||
|
missing = "no",
|
||
|
# label = ls[-length(ls)], ## Removing the last, as this is output
|
||
|
value = list(where(is.factor) ~ "2"),
|
||
|
type = list(mrs_0 ~ "categorical"),
|
||
|
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||
|
)%>%
|
||
|
add_overall() %>%
|
||
|
add_n()%>%
|
||
|
as_gt()
|
||
|
|
||
|
ts
|
||
|
|
||
|
ts_rtf <- file("table1.RTF", "w")
|
||
|
writeLines(ts%>%as_rtf(), ts_rtf)
|
||
|
close(ts_rtf)
|
||
|
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 1: labels
|
||
|
## ====================================================================
|
||
|
lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]
|
||
|
|
||
|
ls<-lapply(1:ncol(X_tbl_f),function(x){
|
||
|
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
|
||
|
})
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 2: table - edited
|
||
|
## ====================================================================
|
||
|
|
||
|
ts_e<-tbl_summary(X_tbl,
|
||
|
missing = "no",
|
||
|
value = list(where(is.factor) ~ "2"),
|
||
|
type = list(mrs_0 ~ "categorical",
|
||
|
mrs_1 ~ "categorical"),
|
||
|
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||
|
)%>%
|
||
|
as_gt()
|
||
|
|
||
|
ts_e
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 3: table export
|
||
|
## ====================================================================
|
||
|
|
||
|
ts_rtf <- file("table1_overall.RTF", "w")
|
||
|
writeLines(ts%>%as_rtf(), ts_rtf)
|
||
|
close(ts_rtf)
|
||
|
|
||
|
## ====================================================================
|
||
|
# Baseline table - by PASE group
|
||
|
## ====================================================================
|
||
|
|
||
|
ts_q <- X_tbl |>
|
||
|
select(vars) |>
|
||
|
mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |>
|
||
|
select(-pase_6,-pase_0) |>
|
||
|
tbl_summary(missing = "no",
|
||
|
by="pase_0_cut",
|
||
|
value = list(where(is.factor) ~ "2"),
|
||
|
type = list(mrs_0 ~ "categorical"),
|
||
|
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||
|
) |>
|
||
|
add_overall() |>
|
||
|
add_n ()
|
||
|
|
||
|
ts_q
|
||
|
|
||
|
## ====================================================================
|
||
|
# Drops and hops
|
||
|
## ====================================================================
|
||
|
|
||
|
# TRUEs are patients dropping
|
||
|
table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
|
||
|
|
||
|
# TRUEs are percentage of patients inactive before stroke being more active after
|
||
|
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
|
||
|
|
||
|
# TRUEs are percentage of patients being more active after that were inactive before stroke
|
||
|
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
|
||
|
|
||
|
# Difference between hop/no-hop
|
||
|
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||
|
|
||
|
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
|
||
|
|
||
|
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||
|
|
||
|
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||
|
|
||
|
# Stationary low
|
||
|
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
|
||
|
|
||
|
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
|
||
|
|
||
|
## ====================================================================
|
||
|
# Sankey plot
|
||
|
## ====================================================================
|
||
|
|
||
|
if (pout=="drop"){
|
||
|
source("sankey.R")
|
||
|
p_delta
|
||
|
}
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
# Six months PASE: Bivariate and multivariate analyses
|
||
|
## ====================================================================
|
||
|
|
||
|
dta_lmreg <- X_tbl |>
|
||
|
select(vars) |>
|
||
|
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)))
|
||
|
|
||
|
Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"
|
||
|
|
||
|
uv_reg <- tbl_uvregression(data=dta_lmreg,
|
||
|
method=lm,
|
||
|
y="pase_6",
|
||
|
show_single_row = where(is.factor),
|
||
|
estimate_fun = ~style_sigfig(.x,digits = 3),
|
||
|
pvalue_fun = ~style_pvalue(.x, digits = 3)
|
||
|
)
|
||
|
|
||
|
mu_reg <- dta_lmreg |>
|
||
|
lm(formula=pase_6~.,data=_) |>
|
||
|
tbl_regression(show_single_row = where(is.factor),
|
||
|
estimate_fun = ~style_sigfig(.x,digits = 3),
|
||
|
pvalue_fun = ~style_pvalue(.x, digits = 3)
|
||
|
)|>
|
||
|
add_n()
|
||
|
|
||
|
tbl_merge(list(uv_reg,mu_reg))
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
##
|
||
|
## Data variance
|
||
|
##
|
||
|
## Illustrating principal components.
|
||
|
##
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
# source("PCA.R")
|
||
|
#
|
||
|
#
|
||
|
# pca22
|
||
|
# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
##
|
||
|
## Models
|
||
|
##
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
source("assign_full.R")
|
||
|
source("regularisation_steps.R")
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 1: data merge
|
||
|
## ====================================================================
|
||
|
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 2: table
|
||
|
## ====================================================================
|
||
|
com_coef_tbl<-tbl%>%
|
||
|
gt()%>%
|
||
|
fmt_number(
|
||
|
columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
|
||
|
rows = everything(),
|
||
|
decimals = 3)%>%
|
||
|
tab_spanner(
|
||
|
label = "Full model",
|
||
|
columns = 6:8
|
||
|
)%>%
|
||
|
tab_spanner(
|
||
|
label = paste0("Regularised model, (a=",
|
||
|
best_alph,
|
||
|
", l=",
|
||
|
round(best_lamb,3),
|
||
|
")"),
|
||
|
columns = 2:5
|
||
|
)%>%
|
||
|
tab_header(
|
||
|
title = "Model coefficients",
|
||
|
subtitle = "Combined table of both full and regularised model coefficients"
|
||
|
)
|
||
|
|
||
|
com_coef_tbl
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 3: export
|
||
|
## ====================================================================
|
||
|
com_coef_rtf <- file("table2.RTF", "w")
|
||
|
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
|
||
|
close(com_coef_rtf)
|
||
|
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
##
|
||
|
## Model performance
|
||
|
##
|
||
|
## Table with performance meassures for the two different models.
|
||
|
##
|
||
|
## ====================================================================
|
||
|
|
||
|
# ROC curve of best model
|
||
|
|
||
|
p_roc
|
||
|
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 1: data set
|
||
|
## ====================================================================
|
||
|
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
|
||
|
"Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
|
||
|
"Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 2: table
|
||
|
## ====================================================================
|
||
|
tbl_perf<-tbl%>%
|
||
|
gt()%>%
|
||
|
tab_header(
|
||
|
title = "Performance meassures",
|
||
|
subtitle = "Combined table of both full and regularised performance meassures"
|
||
|
)
|
||
|
|
||
|
tbl_perf
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 3: export
|
||
|
## ====================================================================
|
||
|
tbl_perf_rtf <- file("table3.RTF", "w")
|
||
|
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
|
||
|
close(tbl_perf_rtf)
|
||
|
#
|
||
|
|
||
|
## ====================================================================
|
||
|
##
|
||
|
## Secondary analysis
|
||
|
##
|
||
|
## ====================================================================
|
||
|
|
||
|
Xy<-dta_s
|
||
|
X<-dta_s|>select(-group)
|
||
|
y<-dta_s$group
|
||
|
|
||
|
|
||
|
source("assign_full.R")
|
||
|
source("regularisation_steps.R")
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 1: data merge
|
||
|
## ====================================================================
|
||
|
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 2: table
|
||
|
## ====================================================================
|
||
|
com_coef_tbl<-tbl%>%
|
||
|
gt()%>%
|
||
|
fmt_number(
|
||
|
columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
|
||
|
rows = everything(),
|
||
|
decimals = 3)%>%
|
||
|
tab_spanner(
|
||
|
label = "Full model",
|
||
|
columns = 6:8
|
||
|
)%>%
|
||
|
tab_spanner(
|
||
|
label = paste0("Regularised model, (a=",
|
||
|
best_alph,
|
||
|
", l=",
|
||
|
round(best_lamb,3),
|
||
|
")"),
|
||
|
columns = 2:5
|
||
|
)%>%
|
||
|
tab_header(
|
||
|
title = "Model coefficients",
|
||
|
subtitle = "Combined table of both full and regularised model coefficients"
|
||
|
)
|
||
|
|
||
|
com_coef_tbl
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 3: export
|
||
|
## ====================================================================
|
||
|
com_coef_rtf <- file("table2_sec.RTF", "w")
|
||
|
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
|
||
|
close(com_coef_rtf)
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
##
|
||
|
## Model performance
|
||
|
##
|
||
|
## Table with performance meassures for the two different models.
|
||
|
##
|
||
|
## ====================================================================
|
||
|
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 1: data set
|
||
|
## ====================================================================
|
||
|
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
|
||
|
"Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
|
||
|
"Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 2: table
|
||
|
## ====================================================================
|
||
|
tbl_perf<-tbl%>%
|
||
|
gt()%>%
|
||
|
tab_header(
|
||
|
title = "Performance meassures",
|
||
|
subtitle = "Combined table of both full and regularised performance meassures"
|
||
|
)
|
||
|
|
||
|
tbl_perf
|
||
|
|
||
|
## ====================================================================
|
||
|
# Step 3: export
|
||
|
## ====================================================================
|
||
|
tbl_perf_rtf <- file("table3_sec.RTF", "w")
|
||
|
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
|
||
|
close(tbl_perf_rtf)
|