all analyses of project one moved to new generation (old saved as gen_1)
This commit is contained in:
parent
244add2bb4
commit
d6fc414822
@ -3,27 +3,31 @@
|
||||
##
|
||||
## Based on the assignment work from the ISL-course
|
||||
##
|
||||
## Generation 2 - 02.december.2022
|
||||
## Code preparation for analysis on Denmarks Statistics server with enriched data set.
|
||||
##
|
||||
## Analysis plan:
|
||||
## Table 1
|
||||
## Figure 1: Sankey plot (drop & hop colored)
|
||||
## Table 2: Linear regression model of pase_6~.
|
||||
## Table 3: Elastic net prediction models of drop and hop. Performance measures referenced in text.
|
||||
##
|
||||
## A Rmarkdown file could be created to write the initial report with main results.
|
||||
## This code is a bit of a mess, as it is the result of several iterations. It works however.
|
||||
##
|
||||
|
||||
## ====================================================================
|
||||
# Step 0: Primary outcome
|
||||
## ====================================================================
|
||||
|
||||
# Difs
|
||||
rel_dif <- 20 # 20 % difference
|
||||
abs_dif <- 20 # 20 point diff
|
||||
# Script to run as hop and drop
|
||||
|
||||
# pout <- "diff"
|
||||
#
|
||||
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
|
||||
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!)
|
||||
#
|
||||
pout <- "drop" # Drop to first quartile
|
||||
|
||||
# decl_rel
|
||||
# decl_abs
|
||||
# drop
|
||||
# hop
|
||||
|
||||
|
||||
## ====================================================================
|
||||
@ -38,20 +42,6 @@ source("data_set.R")
|
||||
|
||||
source("data_format.R")
|
||||
|
||||
## ====================================================================
|
||||
# Libraries
|
||||
## ====================================================================
|
||||
|
||||
|
||||
library(tidyverse)
|
||||
library(glue)
|
||||
library(patchwork)
|
||||
# library(ggdendro)
|
||||
library(corrplot)
|
||||
library(gt)
|
||||
library(gtsummary)
|
||||
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Baseline
|
||||
@ -64,30 +54,30 @@ library(gtsummary)
|
||||
## ====================================================================
|
||||
|
||||
|
||||
lbs<-var.labels[match(colnames(X_tbl),
|
||||
names(var.labels))]
|
||||
|
||||
ls<-lapply(1:ncol(X_tbl),function(x){
|
||||
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
|
||||
})
|
||||
|
||||
ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
|
||||
by = "group",
|
||||
missing = "no",
|
||||
# label = ls[-length(ls)], ## Removing the last, as this is output
|
||||
value = list(where(is.factor) ~ "2"),
|
||||
type = list(mrs_0 ~ "categorical"),
|
||||
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
)%>%
|
||||
add_overall() %>%
|
||||
add_n()%>%
|
||||
as_gt()
|
||||
|
||||
ts
|
||||
|
||||
ts_rtf <- file("table1.RTF", "w")
|
||||
writeLines(ts%>%as_rtf(), ts_rtf)
|
||||
close(ts_rtf)
|
||||
# lbs<-var.labels[match(colnames(X_tbl),
|
||||
# names(var.labels))]
|
||||
#
|
||||
# ls<-lapply(1:ncol(X_tbl),function(x){
|
||||
# as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
|
||||
# })
|
||||
#
|
||||
# ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
|
||||
# by = "group",
|
||||
# missing = "no",
|
||||
# # label = ls[-length(ls)], ## Removing the last, as this is output
|
||||
# value = list(where(is.factor) ~ "2"),
|
||||
# type = list(mrs_0 ~ "categorical"),
|
||||
# statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
# )%>%
|
||||
# add_overall() %>%
|
||||
# add_n()%>%
|
||||
# as_gt()
|
||||
#
|
||||
# ts
|
||||
#
|
||||
# ts_rtf <- file("table1.RTF", "w")
|
||||
# writeLines(ts%>%as_rtf(), ts_rtf)
|
||||
# close(ts_rtf)
|
||||
|
||||
|
||||
|
||||
@ -104,24 +94,24 @@ ls<-lapply(1:ncol(X_tbl_f),function(x){
|
||||
# Step 2: table - edited
|
||||
## ====================================================================
|
||||
|
||||
ts_e<-tbl_summary(X_tbl,
|
||||
missing = "no",
|
||||
value = list(where(is.factor) ~ "2"),
|
||||
type = list(mrs_0 ~ "categorical",
|
||||
mrs_1 ~ "categorical"),
|
||||
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
)%>%
|
||||
as_gt()
|
||||
|
||||
ts_e
|
||||
# ts_e<-tbl_summary(X_tbl,
|
||||
# missing = "no",
|
||||
# value = list(where(is.factor) ~ "2"),
|
||||
# type = list(mrs_0 ~ "categorical",
|
||||
# mrs_1 ~ "categorical"),
|
||||
# statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
# )%>%
|
||||
# as_gt()
|
||||
#
|
||||
# ts_e
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: table export
|
||||
## ====================================================================
|
||||
|
||||
ts_rtf <- file("table1_overall.RTF", "w")
|
||||
writeLines(ts%>%as_rtf(), ts_rtf)
|
||||
close(ts_rtf)
|
||||
# ts_rtf <- file("table1_overall.RTF", "w")
|
||||
# writeLines(ts%>%as_rtf(), ts_rtf)
|
||||
# close(ts_rtf)
|
||||
|
||||
## ====================================================================
|
||||
# Baseline table - by PASE group
|
||||
@ -138,11 +128,43 @@ ts_q <- X_tbl |>
|
||||
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
) |>
|
||||
add_overall() |>
|
||||
add_n
|
||||
add_n ()
|
||||
|
||||
ts_q
|
||||
|
||||
## ====================================================================
|
||||
# Drops and hops
|
||||
## ====================================================================
|
||||
|
||||
# TRUEs are patients dropping
|
||||
table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
|
||||
|
||||
# TRUEs are percentage of patients inactive before stroke being more active after
|
||||
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
|
||||
|
||||
# TRUEs are percentage of patients being more active after that were inactive before stroke
|
||||
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
|
||||
|
||||
# Difference between hop/no-hop
|
||||
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||||
|
||||
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
|
||||
|
||||
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||||
|
||||
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||||
|
||||
# Stationary low
|
||||
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
|
||||
|
||||
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
|
||||
|
||||
## ====================================================================
|
||||
# Sankey plot
|
||||
## ====================================================================
|
||||
|
||||
# source("sankey.R")
|
||||
# p_delta
|
||||
|
||||
## ====================================================================
|
||||
# Six months PASE: Bivariate and multivariate analyses
|
||||
@ -196,13 +218,22 @@ tbl_merge(list(uv_reg,mu_reg))
|
||||
## ====================================================================
|
||||
|
||||
|
||||
source("assign_full.R")
|
||||
# source("assign_full.R")
|
||||
|
||||
ls <- list()
|
||||
for (i in c("drop","hop")){
|
||||
pout <- i
|
||||
source("data_format.R")
|
||||
source("regularisation_steps.R")
|
||||
}
|
||||
|
||||
# Loop to run regularised model on both drop and hop.
|
||||
# Saved in list for printing and exporting the plot.
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: data merge
|
||||
## ====================================================================
|
||||
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
|
||||
tbl<-merge(ls$drop$RegularisedCoefs$'_data',ls$hop$RegularisedCoefs$'_data',by="name",all.x=T, sort=F)
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: table
|
||||
@ -214,22 +245,25 @@ com_coef_tbl<-tbl%>%
|
||||
rows = everything(),
|
||||
decimals = 3)%>%
|
||||
tab_spanner(
|
||||
label = "Full model",
|
||||
columns = 6:8
|
||||
label = "DROP",
|
||||
columns = 2:5
|
||||
)%>%
|
||||
tab_spanner(
|
||||
label = paste0("Regularised model, (a=",
|
||||
best_alph,
|
||||
", l=",
|
||||
round(best_lamb,3),
|
||||
")"),
|
||||
columns = 2:5
|
||||
label = "HOP",
|
||||
columns = 6:9
|
||||
)%>%
|
||||
tab_header(
|
||||
title = "Model coefficients",
|
||||
subtitle = "Combined table of both full and regularised model coefficients"
|
||||
)
|
||||
|
||||
|
||||
# paste0("Regularised model, (a=",
|
||||
# best_alph,
|
||||
# ", l=",
|
||||
# round(best_lamb,3),
|
||||
# ")")
|
||||
|
||||
com_coef_tbl
|
||||
|
||||
## ====================================================================
|
||||
@ -249,12 +283,6 @@ close(com_coef_rtf)
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
# ROC curve of best model
|
||||
|
||||
p_roc
|
||||
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: data set
|
||||
## ====================================================================
|
||||
|
BIN
1 PA Decline/archive/.DS_Store
vendored
Normal file
BIN
1 PA Decline/archive/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
1 PA Decline/archive/generation_1/.DS_Store
vendored
Normal file
BIN
1 PA Decline/archive/generation_1/.DS_Store
vendored
Normal file
Binary file not shown.
409
1 PA Decline/archive/generation_1/00master.R
Normal file
409
1 PA Decline/archive/generation_1/00master.R
Normal file
@ -0,0 +1,409 @@
|
||||
##
|
||||
## Master script
|
||||
##
|
||||
## Based on the assignment work from the ISL-course
|
||||
##
|
||||
##
|
||||
##
|
||||
|
||||
## ====================================================================
|
||||
# Step 0: Primary outcome
|
||||
## ====================================================================
|
||||
|
||||
# Difs
|
||||
rel_dif <- 20 # 20 % difference
|
||||
abs_dif <- 20 # 20 point diff
|
||||
|
||||
# pout <- "diff"
|
||||
#
|
||||
# Note:: By increasing the relative decline, the sensitivity increases and specificity declines.
|
||||
# This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!)
|
||||
#
|
||||
pout <- "drop" # Drop to first quartile
|
||||
|
||||
# decl_rel
|
||||
# decl_abs
|
||||
# drop
|
||||
# hop
|
||||
|
||||
|
||||
## ====================================================================
|
||||
## Data
|
||||
## ====================================================================
|
||||
|
||||
|
||||
setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
|
||||
|
||||
source("data_set.R")
|
||||
# Loading data-set from USB, to not store on computer
|
||||
|
||||
source("data_format.R")
|
||||
|
||||
## ====================================================================
|
||||
# Libraries
|
||||
## ====================================================================
|
||||
|
||||
|
||||
library(tidyverse)
|
||||
library(glue)
|
||||
library(patchwork)
|
||||
# library(ggdendro)
|
||||
library(corrplot)
|
||||
library(gt)
|
||||
library(gtsummary)
|
||||
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Baseline
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Step 0: labels
|
||||
## ====================================================================
|
||||
|
||||
|
||||
lbs<-var.labels[match(colnames(X_tbl),
|
||||
names(var.labels))]
|
||||
|
||||
ls<-lapply(1:ncol(X_tbl),function(x){
|
||||
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
|
||||
})
|
||||
|
||||
ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"),
|
||||
by = "group",
|
||||
missing = "no",
|
||||
# label = ls[-length(ls)], ## Removing the last, as this is output
|
||||
value = list(where(is.factor) ~ "2"),
|
||||
type = list(mrs_0 ~ "categorical"),
|
||||
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
)%>%
|
||||
add_overall() %>%
|
||||
add_n()%>%
|
||||
as_gt()
|
||||
|
||||
ts
|
||||
|
||||
ts_rtf <- file("table1.RTF", "w")
|
||||
writeLines(ts%>%as_rtf(), ts_rtf)
|
||||
close(ts_rtf)
|
||||
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: labels
|
||||
## ====================================================================
|
||||
lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))]
|
||||
|
||||
ls<-lapply(1:ncol(X_tbl_f),function(x){
|
||||
as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\""))
|
||||
})
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: table - edited
|
||||
## ====================================================================
|
||||
|
||||
ts_e<-tbl_summary(X_tbl,
|
||||
missing = "no",
|
||||
value = list(where(is.factor) ~ "2"),
|
||||
type = list(mrs_0 ~ "categorical",
|
||||
mrs_1 ~ "categorical"),
|
||||
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
)%>%
|
||||
as_gt()
|
||||
|
||||
ts_e
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: table export
|
||||
## ====================================================================
|
||||
|
||||
ts_rtf <- file("table1_overall.RTF", "w")
|
||||
writeLines(ts%>%as_rtf(), ts_rtf)
|
||||
close(ts_rtf)
|
||||
|
||||
## ====================================================================
|
||||
# Baseline table - by PASE group
|
||||
## ====================================================================
|
||||
|
||||
ts_q <- X_tbl |>
|
||||
select(vars) |>
|
||||
mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |>
|
||||
select(-pase_6,-pase_0) |>
|
||||
tbl_summary(missing = "no",
|
||||
by="pase_0_cut",
|
||||
value = list(where(is.factor) ~ "2"),
|
||||
type = list(mrs_0 ~ "categorical"),
|
||||
statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]")
|
||||
) |>
|
||||
add_overall() |>
|
||||
add_n ()
|
||||
|
||||
ts_q
|
||||
|
||||
## ====================================================================
|
||||
# Drops and hops
|
||||
## ====================================================================
|
||||
|
||||
# TRUEs are patients dropping
|
||||
table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",])
|
||||
|
||||
# TRUEs are percentage of patients inactive before stroke being more active after
|
||||
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",])
|
||||
|
||||
# TRUEs are percentage of patients being more active after that were inactive before stroke
|
||||
table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",])
|
||||
|
||||
# Difference between hop/no-hop
|
||||
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||||
|
||||
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"])
|
||||
|
||||
summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||||
|
||||
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"])
|
||||
|
||||
# Stationary low
|
||||
t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
|
||||
|
||||
boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"])
|
||||
|
||||
## ====================================================================
|
||||
# Sankey plot
|
||||
## ====================================================================
|
||||
|
||||
if (pout=="drop"){
|
||||
source("sankey.R")
|
||||
p_delta
|
||||
}
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Six months PASE: Bivariate and multivariate analyses
|
||||
## ====================================================================
|
||||
|
||||
dta_lmreg <- X_tbl |>
|
||||
select(vars) |>
|
||||
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)))
|
||||
|
||||
Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0"
|
||||
|
||||
uv_reg <- tbl_uvregression(data=dta_lmreg,
|
||||
method=lm,
|
||||
y="pase_6",
|
||||
show_single_row = where(is.factor),
|
||||
estimate_fun = ~style_sigfig(.x,digits = 3),
|
||||
pvalue_fun = ~style_pvalue(.x, digits = 3)
|
||||
)
|
||||
|
||||
mu_reg <- dta_lmreg |>
|
||||
lm(formula=pase_6~.,data=_) |>
|
||||
tbl_regression(show_single_row = where(is.factor),
|
||||
estimate_fun = ~style_sigfig(.x,digits = 3),
|
||||
pvalue_fun = ~style_pvalue(.x, digits = 3)
|
||||
)|>
|
||||
add_n()
|
||||
|
||||
tbl_merge(list(uv_reg,mu_reg))
|
||||
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Data variance
|
||||
##
|
||||
## Illustrating principal components.
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
|
||||
# source("PCA.R")
|
||||
#
|
||||
#
|
||||
# pca22
|
||||
# ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
|
||||
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Models
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
|
||||
source("assign_full.R")
|
||||
source("regularisation_steps.R")
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: data merge
|
||||
## ====================================================================
|
||||
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: table
|
||||
## ====================================================================
|
||||
com_coef_tbl<-tbl%>%
|
||||
gt()%>%
|
||||
fmt_number(
|
||||
columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
|
||||
rows = everything(),
|
||||
decimals = 3)%>%
|
||||
tab_spanner(
|
||||
label = "Full model",
|
||||
columns = 6:8
|
||||
)%>%
|
||||
tab_spanner(
|
||||
label = paste0("Regularised model, (a=",
|
||||
best_alph,
|
||||
", l=",
|
||||
round(best_lamb,3),
|
||||
")"),
|
||||
columns = 2:5
|
||||
)%>%
|
||||
tab_header(
|
||||
title = "Model coefficients",
|
||||
subtitle = "Combined table of both full and regularised model coefficients"
|
||||
)
|
||||
|
||||
com_coef_tbl
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: export
|
||||
## ====================================================================
|
||||
com_coef_rtf <- file("table2.RTF", "w")
|
||||
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
|
||||
close(com_coef_rtf)
|
||||
|
||||
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Model performance
|
||||
##
|
||||
## Table with performance meassures for the two different models.
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
# ROC curve of best model
|
||||
|
||||
p_roc
|
||||
ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm")
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: data set
|
||||
## ====================================================================
|
||||
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
|
||||
"Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
|
||||
"Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: table
|
||||
## ====================================================================
|
||||
tbl_perf<-tbl%>%
|
||||
gt()%>%
|
||||
tab_header(
|
||||
title = "Performance meassures",
|
||||
subtitle = "Combined table of both full and regularised performance meassures"
|
||||
)
|
||||
|
||||
tbl_perf
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: export
|
||||
## ====================================================================
|
||||
tbl_perf_rtf <- file("table3.RTF", "w")
|
||||
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
|
||||
close(tbl_perf_rtf)
|
||||
#
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Secondary analysis
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
Xy<-dta_s
|
||||
X<-dta_s|>select(-group)
|
||||
y<-dta_s$group
|
||||
|
||||
|
||||
source("assign_full.R")
|
||||
source("regularisation_steps.R")
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: data merge
|
||||
## ====================================================================
|
||||
tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F)
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: table
|
||||
## ====================================================================
|
||||
com_coef_tbl<-tbl%>%
|
||||
gt()%>%
|
||||
fmt_number(
|
||||
columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric
|
||||
rows = everything(),
|
||||
decimals = 3)%>%
|
||||
tab_spanner(
|
||||
label = "Full model",
|
||||
columns = 6:8
|
||||
)%>%
|
||||
tab_spanner(
|
||||
label = paste0("Regularised model, (a=",
|
||||
best_alph,
|
||||
", l=",
|
||||
round(best_lamb,3),
|
||||
")"),
|
||||
columns = 2:5
|
||||
)%>%
|
||||
tab_header(
|
||||
title = "Model coefficients",
|
||||
subtitle = "Combined table of both full and regularised model coefficients"
|
||||
)
|
||||
|
||||
com_coef_tbl
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: export
|
||||
## ====================================================================
|
||||
com_coef_rtf <- file("table2_sec.RTF", "w")
|
||||
writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf)
|
||||
close(com_coef_rtf)
|
||||
|
||||
|
||||
## ====================================================================
|
||||
##
|
||||
## Model performance
|
||||
##
|
||||
## Table with performance meassures for the two different models.
|
||||
##
|
||||
## ====================================================================
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: data set
|
||||
## ====================================================================
|
||||
tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"),
|
||||
"Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3),
|
||||
"Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3))
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: table
|
||||
## ====================================================================
|
||||
tbl_perf<-tbl%>%
|
||||
gt()%>%
|
||||
tab_header(
|
||||
title = "Performance meassures",
|
||||
subtitle = "Combined table of both full and regularised performance meassures"
|
||||
)
|
||||
|
||||
tbl_perf
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: export
|
||||
## ====================================================================
|
||||
tbl_perf_rtf <- file("table3_sec.RTF", "w")
|
||||
writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf)
|
||||
close(tbl_perf_rtf)
|
290
1 PA Decline/archive/generation_1/data_format.R
Normal file
290
1 PA Decline/archive/generation_1/data_format.R
Normal file
@ -0,0 +1,290 @@
|
||||
## ItMLiHSmar2022
|
||||
## data_format.R, child script
|
||||
## Data formatting and handling
|
||||
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
|
||||
##
|
||||
## Now modified to use in publication
|
||||
##
|
||||
|
||||
## ====================================================================
|
||||
# Step 1: Loading libraries
|
||||
## ====================================================================
|
||||
|
||||
library(Hmisc)
|
||||
library(dplyr)
|
||||
library(daDoctoR)
|
||||
library(tidyselect)
|
||||
|
||||
## ====================================================================
|
||||
# Step 2: Loading data
|
||||
## ====================================================================
|
||||
|
||||
# rm(list = ls()) # Clear
|
||||
# setwd("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/")
|
||||
# dta<-read.csv("/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv")
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: Formatting variables
|
||||
## ====================================================================
|
||||
|
||||
dta <- export %>%
|
||||
# as_tibble()%>%
|
||||
mutate(any_rep=factor(ifelse(thrombolysis=="yes"|thrombechtomy=="yes","yes","no")), # If not noted, no therapy was received
|
||||
male_sex= factor(ifelse(sex=="female","no","yes")),
|
||||
# smoke_ever=factor(ifelse(smoke_ever=="never","no","yes")),
|
||||
civil=factor(ifelse(civil=="partner","no","yes")), # Sets "yes" for not-cohabiting
|
||||
rtreat=factor(ifelse(rtreat=="Placebo","no","yes")), # "Yes" receives active treatment
|
||||
alc=factor(ifelse(alc=="more","yes","no")), # Yes for more than guideline
|
||||
pase_0=as.numeric(pase_0),
|
||||
pase_6=as.numeric(pase_6),
|
||||
across(c("diabetes",
|
||||
"hypertension",
|
||||
"smoker",
|
||||
# "smoker_prev",
|
||||
"afli",
|
||||
"pad",
|
||||
"ami",
|
||||
"tci",
|
||||
"mrs_0",
|
||||
"mrs_1"),as.factor),
|
||||
across(c("nihss_c",
|
||||
"age",
|
||||
"mdi_1", # For "enriched" analysis
|
||||
"who5_score_1",
|
||||
"mfi_gen_1",
|
||||
"mfi_phys_1",
|
||||
"mfi_act_1",
|
||||
"mfi_mot_1",
|
||||
"mfi_men_1"),as.numeric )
|
||||
)%>%
|
||||
select(-c(sex))
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Step 4: Defining outcome
|
||||
## ====================================================================
|
||||
|
||||
## Changed to step 7
|
||||
## This is to perform proper quantile split based on actually included.
|
||||
|
||||
## ====================================================================
|
||||
# Step 5: Ordering variables
|
||||
## ====================================================================
|
||||
|
||||
vars <- c("age",
|
||||
"male_sex",
|
||||
"civil",
|
||||
"pase_0",
|
||||
"smoker",
|
||||
"alc",
|
||||
"afli",
|
||||
"hypertension",
|
||||
"diabetes",
|
||||
"pad",
|
||||
"ami",
|
||||
"tci",
|
||||
"mrs_0",
|
||||
"nihss_c",
|
||||
"any_rep",
|
||||
"rtreat",
|
||||
"pase_6")
|
||||
|
||||
dta<-select(dta,c(vars,
|
||||
"mrs_1",
|
||||
"mfi_gen_1",
|
||||
"mfi_phys_1",
|
||||
"mfi_act_1",
|
||||
"mfi_mot_1",
|
||||
"mfi_men_1",
|
||||
"mdi_1",
|
||||
"who5_score_1"
|
||||
))
|
||||
|
||||
## ====================================================================
|
||||
# Step 6: Labeling
|
||||
## ====================================================================
|
||||
|
||||
var.labels = c(age="Age",
|
||||
male_sex="Male",
|
||||
civil="Living alone",
|
||||
pase_0="Pre-stroke PASE score",
|
||||
pase_6="Six month PASE score",
|
||||
smoker="Daily or occasinally smoking",
|
||||
# smoker_prev="Previous habbit of smoking",
|
||||
alc="More alcohol than recommendation",
|
||||
afli="AFIB",
|
||||
hypertension="Hypertension",
|
||||
diabetes="Diabetes",
|
||||
pad="PAD",
|
||||
ami="Previous MI",
|
||||
tci="Previous TIA",
|
||||
mrs_0="Pre-stroke mRS [-1]",
|
||||
nihss_c="Acute NIHSS score",
|
||||
thrombolysis="Acute thrombolysis",
|
||||
thrombechtomy="Acute thrombechtomy",
|
||||
any_rep="Any reperfusion therapy",
|
||||
rtreat="Active trial treatment",
|
||||
mrs_1="One month mRS [-1]",
|
||||
mfi_gen_1="One month MFI (General fatigue)",
|
||||
mfi_phys_1="One month MFI (Physical fatigue)",
|
||||
mfi_act_1="One month MFI (Reduced activity)",
|
||||
mfi_mot_1="One month MFI (Reduced motivation)",
|
||||
mfi_men_1="One month MFI (Mental fatigue)",
|
||||
mdi_1="One month MDI",
|
||||
who5_score_1="One month WHO5",
|
||||
pase_decl_rel_fac="PASE score difference, relative F",
|
||||
pase_decl_abs_fac="PASE score difference, absolute F",
|
||||
pase_drop_fac="PASE first quartile drop F",
|
||||
pase_hop_fac="PASE first quartile hop F",
|
||||
pase_diff="PASE absolute decline",
|
||||
pase_decl_rel="PASE relative decline",
|
||||
pase_0_cut="PASE 0 quartiles",
|
||||
pase_6_cut="PASE 6 quartiles")
|
||||
|
||||
## Labelling based on outcome flag
|
||||
if (pout=="decl_rel"|pout=="decl_abs"){
|
||||
var.labels = c(var.labels,group="PASE decline")}
|
||||
if (pout=="drop"){
|
||||
var.labels = c(var.labels,group="PASE drop")}
|
||||
|
||||
## ====================================================================
|
||||
# Step 7: final data export
|
||||
## ====================================================================
|
||||
|
||||
data_summary<-summary(dta)
|
||||
|
||||
# Saving "old" factorised variables
|
||||
sel<-sapply(dta,is.factor)
|
||||
# Reformatting factors as 1/2 for analysis
|
||||
dta<-dta |>
|
||||
mutate(across(where(is.factor), as.numeric))|> # Turning factors into 1(no) or 2(yes) for model. Numbered alphabetically.
|
||||
mutate(across(matches(colnames(dta)[sel]), as.factor),
|
||||
across(starts_with("pase_"), as.numeric))
|
||||
|
||||
# Filtering out non-PASE
|
||||
X_tbl<-dta |>
|
||||
filter(!is.na(pase_0),!is.na(pase_6))
|
||||
|
||||
nrow(X_tbl)
|
||||
|
||||
# Defining possible outcome meassures. Keeping in df for characterisation
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(## Relative decline
|
||||
pase_diff=(pase_0-pase_6),
|
||||
pase_decl_rel = pase_diff/pase_0*100,
|
||||
pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
|
||||
## Absolute decline
|
||||
pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
|
||||
## Drop
|
||||
pase_0_cut=quantile_cut(as.numeric(pase_0),
|
||||
groups=4,
|
||||
group.names = c(as.character(1:4)),
|
||||
y=as.numeric(pase_0),
|
||||
ordered.f = TRUE,
|
||||
inc.outs = TRUE,
|
||||
detail.lst=FALSE),
|
||||
pase_6_cut=quantile_cut(as.numeric(pase_6),
|
||||
groups=4,
|
||||
group.names = c(as.character(1:4)),
|
||||
y=as.numeric(pase_0),
|
||||
ordered.f = TRUE,
|
||||
inc.outs = TRUE,
|
||||
detail.lst=FALSE),
|
||||
pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
|
||||
pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
|
||||
|
||||
Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
|
||||
|
||||
# Setting final primary output from "pout"
|
||||
if (pout=="decl_rel"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_decl_rel_fac)
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_0!=0)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
if (pout=="decl_abs"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_decl_rel_fac)
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_0>=abs_dif)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
if (pout=="drop"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_drop_fac)
|
||||
|
||||
# print(quantile(as.numeric(X_tbl$pase_0)))
|
||||
# print(quantile(as.numeric(X_tbl$pase_6)))
|
||||
# print(summary(X_tbl$pase_0_cut))
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_0_cut!=1)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
if (pout=="hop"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_hop_fac)
|
||||
|
||||
# print(quantile(as.numeric(X_tbl$pase_0)))
|
||||
# print(quantile(as.numeric(X_tbl$pase_6)))
|
||||
# print(summary(X_tbl$pase_0_cut))
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_6_cut!=1)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
# Excluding one month measures for primary analysis and setting df for table one
|
||||
X_tbl_f <- X_tbl_f |>
|
||||
select(-c(who5_score_1,
|
||||
mdi_1,
|
||||
mrs_1,
|
||||
starts_with("mfi_"))) # Left out of model as no present in drop-group
|
||||
|
||||
# Dropping non-complete for analysis
|
||||
Xy <- X_tbl_f|>
|
||||
na.omit()|> # Keeping only complete observations
|
||||
select(-c(tci) # Left out of model as no present in drop-group
|
||||
)|>
|
||||
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) # Sets binary mRS 0 to include in glmnet, 0 or above
|
||||
|
||||
label(Xy) = as.list(var.labels[match(names(Xy), names(var.labels))])
|
||||
|
||||
X<-dplyr::select(Xy,-c(group, -starts_with("pase_")) # Exclude primary outcome
|
||||
)
|
||||
y<-Xy$group
|
||||
|
||||
|
||||
## ====================================================================
|
||||
# Secondary analysis
|
||||
## ====================================================================
|
||||
|
||||
|
||||
dta_s<-X_tbl|>
|
||||
select(-c(tci),
|
||||
-starts_with("pase_"))|>
|
||||
na.omit()|>
|
||||
mutate(mrs_0=factor(ifelse(mrs_0==1,1,2)),# Sets binary mRS 0 to include in glmnet, 0 or above
|
||||
mrs_1=factor(ifelse(mrs_1==1,1,2)))# Sets binary mRS 1 to include in glmnet, 0 or above
|
||||
|
||||
label(dta_s) = as.list(var.labels[match(names(dta_s), names(var.labels))])
|
||||
|
||||
## ====================================================================
|
||||
# Step 8: Loading rest of libraries
|
||||
## ====================================================================
|
||||
|
||||
library(tidyverse)
|
||||
library(patchwork)
|
||||
library(caret)
|
||||
library(glmnet)
|
||||
library(leaps)
|
||||
library(pROC)
|
||||
library(gt)
|
||||
library(gtsummary)
|
||||
library(dplyr)
|
49
1 PA Decline/archive/generation_1/data_set.R
Normal file
49
1 PA Decline/archive/generation_1/data_set.R
Normal file
@ -0,0 +1,49 @@
|
||||
## ItMLiHS assignment data set
|
||||
|
||||
export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
|
||||
|
||||
export<-export[,c("pase_0",
|
||||
"age",
|
||||
"sex",
|
||||
"civil",
|
||||
"smoke_ever",
|
||||
"smoker",
|
||||
"rtreat",
|
||||
"alc",
|
||||
"afli",
|
||||
"hypertension",
|
||||
"diabetes",
|
||||
"mrs_0",
|
||||
"nihss_c",
|
||||
"thrombolysis",
|
||||
"pad",
|
||||
"thrombechtomy",
|
||||
"ami",
|
||||
"tci",
|
||||
"pase_6",
|
||||
"mrs_1",
|
||||
"mfi_gen_1",
|
||||
"mfi_phys_1",
|
||||
"mfi_act_1",
|
||||
"mfi_mot_1",
|
||||
"mfi_men_1",
|
||||
"mdi_1",
|
||||
"who5_score_1")]
|
||||
|
||||
export$diabetes[is.na(export$diabetes)]<-"no"
|
||||
export$diabetes[is.na(export$hypertension)]<-"no"
|
||||
export$thrombolysis[is.na(export$thrombolysis)]<-"no"
|
||||
export$thrombechtomy[is.na(export$thrombechtomy)]<-"no"
|
||||
export$pad[is.na(export$pad)]<-"no"
|
||||
export$ami[is.na(export$ami)]<-"no"
|
||||
# export$smoker_prev <- ifelse(export$smoker=="3","yes","no")
|
||||
export$smoker <- ifelse(export$smoker=="1","yes","no")
|
||||
export$smoker[is.na(export$smoker)] <- "no"
|
||||
# export$mrs_0[export$mrs_0==3]<-NA
|
||||
|
||||
|
||||
|
||||
# export<-na.omit(export)
|
||||
|
||||
export
|
||||
# write.csv(export,"/Users/au301842/Library/CloudStorage/OneDrive-Personligt/Research/ISLcourse/assigndata.csv",row.names = FALSE)
|
Before Width: | Height: | Size: 317 KiB After Width: | Height: | Size: 317 KiB |
117
1 PA Decline/archive/generation_1/regular_fun.R
Normal file
117
1 PA Decline/archive/generation_1/regular_fun.R
Normal file
@ -0,0 +1,117 @@
|
||||
## ItMLiHSmar2022
|
||||
## regular_fun.R, child script
|
||||
## Regularisation model building function
|
||||
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
|
||||
##
|
||||
## Now modified to use in publication
|
||||
##
|
||||
|
||||
regular_fun<-function(X,y,K,lambdas,alpha){
|
||||
n<-nrow(X)
|
||||
set.seed(321)
|
||||
|
||||
# Using caret function to ensure both levels represented in all folds
|
||||
c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE)
|
||||
|
||||
B<-yhatTestProbKeep<-list()
|
||||
accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))
|
||||
|
||||
catinfo<-levels(y)
|
||||
|
||||
cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))
|
||||
|
||||
|
||||
## Iterate over partitions
|
||||
for (idx1 in 1:K){
|
||||
|
||||
# Status
|
||||
cat('Processing fold', idx1, 'of', K,'\n')
|
||||
|
||||
# idx1=1
|
||||
# Get training- and test sets
|
||||
I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
|
||||
I_test = !I_train
|
||||
|
||||
Xtrain = X[I_train,]
|
||||
ytrain = y[I_train]
|
||||
Xtest = X[I_test,]
|
||||
ytest = y[I_test]
|
||||
|
||||
|
||||
## Model matrices for glmnet
|
||||
## Using the complicated approach not to include first level.
|
||||
# Xmat.train<-model.matrix(~ .-1, data=Xtrain,
|
||||
# contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)],
|
||||
# contrasts, contrasts=T))
|
||||
# Xmat.test<-model.matrix(~ .-1, data=Xtest,
|
||||
# contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)],
|
||||
# contrasts, contrasts=T))
|
||||
|
||||
# Xmat.train<-model.matrix(~.-1,Xtrain)
|
||||
# Xmat.test<-model.matrix(~.-1,Xtest)
|
||||
|
||||
# Weights
|
||||
ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
|
||||
# ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
|
||||
|
||||
# Fit regularized linear regression model
|
||||
mod<-glmnet(Xtrain, ytrain,
|
||||
alpha = alpha, ## Alpha = 1 for lasso
|
||||
lambda = lambdas, ## Setting lambdas
|
||||
standardize = TRUE, ## Scales and centers
|
||||
weights = ytrain_weight,
|
||||
family = "binomial"
|
||||
)
|
||||
|
||||
# Keep coefficients for plot
|
||||
B[[idx1]] <- as.matrix(coef(mod))
|
||||
|
||||
# Iterate over regularization strengths to compute training- and test
|
||||
# errors for individual regularization strengths.
|
||||
for (idx2 in 1:length(lambdas)){
|
||||
# idx2=1
|
||||
|
||||
# Predict
|
||||
yhatTrainProb<-predict(mod,
|
||||
s = lambdas[idx2],
|
||||
newx = data.matrix(Xtrain),
|
||||
type = "response"
|
||||
)
|
||||
|
||||
yhatTestProb<-predict(mod,
|
||||
s = lambdas[idx2],
|
||||
newx = data.matrix(Xtest),
|
||||
type = "response"
|
||||
)
|
||||
|
||||
# Compute training and test error
|
||||
yhatTrain = round(yhatTrainProb)
|
||||
yhatTest = round(yhatTestProb)
|
||||
|
||||
# Make predictions categorical again (instead of 0/1 coding)
|
||||
yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
|
||||
yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
|
||||
|
||||
# Evaluate classifier performance
|
||||
# Accuracy
|
||||
# accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
|
||||
# accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
|
||||
# #
|
||||
# # Error rate
|
||||
# err_train[idx1,idx2] = 1 - accTrain[idx1,idx2]
|
||||
# err_test [idx1,idx2] = 1 - accTest[idx1,idx2]
|
||||
|
||||
# AUROC
|
||||
suppressMessages(
|
||||
auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
|
||||
suppressMessages(
|
||||
auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
|
||||
|
||||
# Compute confusion matrices
|
||||
cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
|
||||
cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
|
||||
}
|
||||
}
|
||||
ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
|
||||
return(ls)
|
||||
}
|
142
1 PA Decline/archive/generation_1/regularisation_steps.R
Normal file
142
1 PA Decline/archive/generation_1/regularisation_steps.R
Normal file
@ -0,0 +1,142 @@
|
||||
## ItMLiHSmar2022
|
||||
## regularisation_steps.R, child script
|
||||
## Regularised model building and analysation for assignment
|
||||
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
|
||||
##
|
||||
## Now modified to use in publication
|
||||
##
|
||||
|
||||
## ====================================================================
|
||||
## Step 0: data import and wrangling
|
||||
## ====================================================================
|
||||
|
||||
setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/")
|
||||
|
||||
# source("data_format.R")
|
||||
y1<-factor(as.integer(y)-1) ## Outcome is required to be factor of 0 or 1.
|
||||
|
||||
|
||||
## ====================================================================
|
||||
## Step 1: settings
|
||||
## ====================================================================
|
||||
|
||||
## Folds
|
||||
K=10
|
||||
set.seed(3)
|
||||
c<-caret::createFolds(y=y,
|
||||
k = K,
|
||||
list = FALSE,
|
||||
returnTrain = TRUE) # Foldids for alpha tuning
|
||||
|
||||
## Defining tuning parameters
|
||||
lambdas=2^seq(-10, 5, 1)
|
||||
alphas<-seq(0,1,.1)
|
||||
|
||||
## Weights for models
|
||||
weighted=TRUE
|
||||
if (weighted == TRUE) {
|
||||
wght<-as.vector(1 - (table(y)[y] / length(y)))
|
||||
} else {
|
||||
wght <- rep(1, nrow(y))
|
||||
}
|
||||
|
||||
|
||||
## Standardise numeric
|
||||
## Centered and
|
||||
|
||||
|
||||
|
||||
## ====================================================================
|
||||
## Step 2: all cross validations for each alpha
|
||||
## ====================================================================
|
||||
|
||||
library(furrr)
|
||||
library(purrr)
|
||||
library(doMC)
|
||||
registerDoMC(cores=6)
|
||||
|
||||
# Nested CVs with analysis for all lambdas for each alpha
|
||||
#
|
||||
set.seed(3)
|
||||
cvs <- future_map(alphas, function(a){
|
||||
cv.glmnet(model.matrix(~.-1,X),
|
||||
y1,
|
||||
weights = wght,
|
||||
lambda=lambdas,
|
||||
type.measure = "deviance", # This is standard measure and recommended for tuning
|
||||
foldid = c, # Per recommendation the folds are kept for alpha optimisation
|
||||
alpha=a,
|
||||
standardize=TRUE,
|
||||
family=quasibinomial,
|
||||
keep=TRUE) # Same as binomial, but not as picky
|
||||
})
|
||||
|
||||
## ====================================================================
|
||||
# Step 3: optimum lambda for each alpha
|
||||
## ====================================================================
|
||||
|
||||
|
||||
# For each alpha, lambda is chosen for the lowest meassure (deviance)
|
||||
each_alpha <- sapply(seq_along(alphas), function(id) {
|
||||
each_cv <- cvs[[id]]
|
||||
alpha_val <- alphas[id]
|
||||
index_lmin <- match(each_cv$lambda.min,
|
||||
each_cv$lambda)
|
||||
c(lamb = each_cv$lambda.min,
|
||||
alph = alpha_val,
|
||||
cvm = each_cv$cvm[index_lmin])
|
||||
})
|
||||
|
||||
# Best lambda
|
||||
best_lamb <- min(each_alpha["lamb", ])
|
||||
|
||||
# Alpha is chosen for best lambda with lowest model deviance, each_alpha["cvm",]
|
||||
best_alph <- each_alpha["alph",][each_alpha["cvm",]==min(each_alpha["cvm",]
|
||||
[each_alpha["lamb",] %in% best_lamb])]
|
||||
|
||||
## https://stackoverflow.com/questions/42007313/plot-an-roc-curve-in-r-with-ggplot2
|
||||
p_roc<-roc.glmnet(cvs[[1]]$fit.preval, newy = y)[[match(best_alph,alphas)]]|> # Plots performance from model with best alpha
|
||||
ggplot(aes(FPR,TPR)) +
|
||||
geom_step() +
|
||||
coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
|
||||
geom_abline()+
|
||||
theme_bw()
|
||||
|
||||
## ====================================================================
|
||||
# Step 4: Creating the final model
|
||||
## ====================================================================
|
||||
|
||||
source("regular_fun.R") # Custom function
|
||||
optimised_model<-regular_fun(X,y1,K,lambdas=best_lamb,alpha=best_alph)
|
||||
# With lambda and alpha specified, the function is just a k-fold cross-validation wrapper,
|
||||
# but keeps model performance figures from each fold.
|
||||
|
||||
list2env(optimised_model,.GlobalEnv)
|
||||
# Function outputs a list, which is unwrapped to Env.
|
||||
# See source script for reference.
|
||||
|
||||
## ====================================================================
|
||||
# Step 5: creating table of coefficients for inference
|
||||
## ====================================================================
|
||||
|
||||
Bmatrix<-matrix(unlist(B),ncol=10)
|
||||
Bmedian<-apply(Bmatrix,1,median)
|
||||
Bmean<-apply(Bmatrix,1,mean)
|
||||
|
||||
reg_coef_tbl<-tibble(
|
||||
name = c("Intercept",Hmisc::label(X)),
|
||||
medianX = round(Bmedian,5),
|
||||
ORmed = round(exp(Bmedian),5),
|
||||
meanX = round(Bmean,5),
|
||||
ORmea = round(exp(Bmean),5))%>%
|
||||
# arrange(desc(abs(medianX)))%>%
|
||||
gt()
|
||||
|
||||
reg_coef_tbl
|
||||
|
||||
## ====================================================================
|
||||
# Step 6: plotting predictive performance
|
||||
## ====================================================================
|
||||
|
||||
reg_cfm<-confusionMatrix(cMatTest)
|
||||
reg_auc_sum<-summary(auc_test[,1])
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 63 KiB |
72
1 PA Decline/archive/generation_1/sankey.R
Normal file
72
1 PA Decline/archive/generation_1/sankey.R
Normal file
@ -0,0 +1,72 @@
|
||||
#
|
||||
# Sankey plot of quartile movement for drops
|
||||
#
|
||||
|
||||
gth<-X_tbl[,c("pase_0_cut","pase_6_cut","pase_drop_fac")]
|
||||
|
||||
gth$pase_drop_fac <- factor(ifelse(gth$pase_0_cut=="1",
|
||||
"low",
|
||||
gth$pase_drop_fac),
|
||||
labels = c("no","yes","low")) # Tried and tried to do vectorised, but failed. Matrices acting up..
|
||||
|
||||
# Visuals - sankey
|
||||
# https://stackoverflow.com/questions/50395027/beautifying-sankey-alluvial-visualization-using-r
|
||||
|
||||
|
||||
## Painting
|
||||
# LOOK AT THIS GREAT FUNCTION!! Wide pivot format. Includes factor for possible quartile-colouring.
|
||||
|
||||
df<-data.frame(gth %>% count(pase_0_cut,pase_6_cut,pase_drop_fac))
|
||||
|
||||
lbs0<-c(paste0("1st\n(n=",sum(df$n[df[1]=="1"]),")"),
|
||||
paste0("2nd\n(n=",sum(df$n[df[1]=="2"]),")"),
|
||||
paste0("3rd\n(n=",sum(df$n[df[1]=="3"]),")"),
|
||||
paste0("4th\n(n=",sum(df$n[df[1]=="4"]),")"))
|
||||
|
||||
lbs6<-c(paste0("1st\n(n=",sum(df$n[df[2]=="1"]),")"),
|
||||
paste0("2nd\n(n=",sum(df$n[df[2]=="2"]),")"),
|
||||
paste0("3rd\n(n=",sum(df$n[df[2]=="3"]),")"),
|
||||
paste0("4th\n(n=",sum(df$n[df[2]=="4"]),")"))
|
||||
|
||||
df[1:2] <- as_factor(df[1:2])
|
||||
|
||||
levels(df[,1])<-lbs0[1:length(levels(df[,1]))]
|
||||
levels(df[,2])<-lbs6[1:length(levels(df[,2]))]
|
||||
|
||||
df[,3]<-factor(df[,3],levels=c("low","no","yes"))
|
||||
|
||||
|
||||
lows <- "grey80" # grey
|
||||
drops <- "#990033" # Midtrød
|
||||
nos <- "grey50"
|
||||
nas <- "grey90"
|
||||
border<- "#66c1a3"
|
||||
box <- "#7fccb2"
|
||||
|
||||
cls <- c(lows,nos,drops)
|
||||
alpha <- 0.7
|
||||
|
||||
library(ggalluvial)
|
||||
(p_delta<-ggplot(df,aes(y = n, axis1 = pase_0_cut, axis2 = pase_6_cut)) +
|
||||
geom_alluvium(aes(fill = pase_drop_fac, color=pase_drop_fac), width = 1/10, alpha = alpha, knot.pos = 0.3)+
|
||||
geom_stratum(width = 1/6, fill = box, color = border) +
|
||||
geom_text(stat = "stratum", aes(label=after_stat(stratum))) +
|
||||
scale_x_continuous(breaks = 1:2, labels = c("Pre-stroke\nPASE score\nquartiles", "Six months\nPASE score\nquartiles")) +
|
||||
scale_fill_manual(values = cls) +
|
||||
scale_color_manual(values = cls) +
|
||||
scale_y_reverse() + # Easy solution to flip y-axis
|
||||
labs(title="Change in physical activity") +
|
||||
ylab("Quartiles")+
|
||||
theme_minimal() +
|
||||
theme(legend.position = "none",
|
||||
panel.grid.major = element_blank(),
|
||||
panel.grid.minor = element_blank(),
|
||||
axis.text.y = element_blank(),
|
||||
axis.title.y = element_blank(),
|
||||
axis.text.x = element_text(size = 14, face = "bold"),
|
||||
plot.title = element_text(hjust = 0.5, size = 20, face = "bold")))
|
||||
|
||||
ggsave("sankey.png", plot = last_plot(), device = NULL, path = NULL,
|
||||
scale = 1, width = 120, height = 200, dpi = 450, limitsize = TRUE,
|
||||
units = "mm")
|
||||
|
BIN
1 PA Decline/archive/generation_1/sankey.png
Normal file
BIN
1 PA Decline/archive/generation_1/sankey.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 695 KiB |
41
1 PA Decline/archive/generation_1/standardise.R
Normal file
41
1 PA Decline/archive/generation_1/standardise.R
Normal file
@ -0,0 +1,41 @@
|
||||
## ItMLiHSmar2022
|
||||
## standardise.R, child script
|
||||
## Data standardisation, returns list
|
||||
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
|
||||
|
||||
standardise<-function(train,test,type){
|
||||
# From:
|
||||
# https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r
|
||||
|
||||
sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric)
|
||||
cnm<-colnames(Xtrain) # Saving column names for ordering
|
||||
|
||||
# Subsetting
|
||||
|
||||
## Data to treat
|
||||
train.tr<-train[,sel]
|
||||
test.tr<-test[,sel]
|
||||
|
||||
## Data to save
|
||||
train.sv<-train[,!sel]
|
||||
test.sv<-test[,!sel]
|
||||
|
||||
# Calculate mean and SD of train data
|
||||
trainMean <- sapply(train.tr,mean)
|
||||
trainSd <- sapply(train.tr,sd)
|
||||
|
||||
if (type=="c"){
|
||||
## centered
|
||||
norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
|
||||
norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
|
||||
}
|
||||
|
||||
if (type=="cs"){
|
||||
## centered AND scaled (Z-score standardisation)
|
||||
norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/")
|
||||
norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/")
|
||||
}
|
||||
return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original
|
||||
XtestSt=cbind(norm.testData,test.sv)[,cnm]))
|
||||
}
|
||||
|
@ -141,7 +141,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
|
||||
\intbl {\f0 {\f0\fs20 AFIB}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 386}}\cell
|
||||
@ -160,7 +160,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Hypertension}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 387}}\cell
|
||||
@ -179,7 +179,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Diabetes}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 391}}\cell
|
||||
@ -198,7 +198,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known PAD}}\cell
|
||||
\intbl {\f0 {\f0\fs20 PAD}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 391}}\cell
|
||||
@ -710,6 +710,25 @@
|
||||
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 391}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
|
||||
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
|
||||
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
|
||||
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
|
||||
|
||||
\row
|
||||
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
|
||||
\intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell
|
||||
|
@ -141,7 +141,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
|
||||
\intbl {\f0 {\f0\fs20 AFIB}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 386}}\cell
|
||||
@ -160,7 +160,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Hypertension}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 387}}\cell
|
||||
@ -179,7 +179,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Diabetes}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 391}}\cell
|
||||
@ -198,7 +198,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 Known PAD}}\cell
|
||||
\intbl {\f0 {\f0\fs20 PAD}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 391}}\cell
|
||||
@ -710,6 +710,25 @@
|
||||
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1872
|
||||
\intbl {\f0 {\f0\fs20 PASE first quartile hop F}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx3744
|
||||
\intbl {\f0 {\f0\fs20 391}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx5616
|
||||
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx7488
|
||||
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
|
||||
|
||||
\pard\plain\uc0\qc\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
|
||||
\intbl {\f0 {\f0\fs20 0 (0%)}}\cell
|
||||
|
||||
\row
|
||||
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx9360
|
||||
\intbl {\f0 {\f0\fs20 {\super \i 1}Median (25%;75%) [Minimum,Maximum]; n (%)}}\cell
|
||||
|
@ -239,7 +239,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
|
||||
\intbl {\f0 {\f0\fs20 AFIB}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
||||
@ -267,7 +267,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Hypertension}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.083}}\cell
|
||||
@ -295,7 +295,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Diabetes}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
||||
@ -323,7 +323,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known PAD}}\cell
|
||||
\intbl {\f0 {\f0\fs20 PAD}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
@ -239,7 +239,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known AFIB}}\cell
|
||||
\intbl {\f0 {\f0\fs20 AFIB}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
||||
@ -267,7 +267,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known hypertension}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Hypertension}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
||||
@ -295,7 +295,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known diabetes}}\cell
|
||||
\intbl {\f0 {\f0\fs20 Diabetes}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
||||
@ -323,7 +323,7 @@
|
||||
\trowd\trrh0
|
||||
|
||||
\pard\plain\uc0\ql\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx1170
|
||||
\intbl {\f0 {\f0\fs20 Known PAD}}\cell
|
||||
\intbl {\f0 {\f0\fs20 PAD}}\cell
|
||||
|
||||
\pard\plain\uc0\qr\clvertalc \clpadfl3\clpadl25 \clpadft3\clpadt85 \clpadfb3\clpadb25 \clpadfr3\clpadr85 \cellx2340
|
||||
\intbl {\f0 {\f0\fs20 0.000}}\cell
|
@ -135,6 +135,7 @@ var.labels = c(age="Age",
|
||||
pase_decl_rel_fac="PASE score difference, relative F",
|
||||
pase_decl_abs_fac="PASE score difference, absolute F",
|
||||
pase_drop_fac="PASE first quartile drop F",
|
||||
pase_hop_fac="PASE first quartile hop F",
|
||||
pase_diff="PASE absolute decline",
|
||||
pase_decl_rel="PASE relative decline",
|
||||
pase_0_cut="PASE 0 quartiles",
|
||||
@ -171,9 +172,9 @@ X_tbl <- X_tbl|>
|
||||
mutate(## Relative decline
|
||||
pase_diff=(pase_0-pase_6),
|
||||
pase_decl_rel = pase_diff/pase_0*100,
|
||||
pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
|
||||
# pase_decl_rel_fac=factor(ifelse(pase_decl_rel>=rel_dif,"yes","no")),
|
||||
## Absolute decline
|
||||
pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
|
||||
# pase_decl_abs_fac=factor(ifelse(pase_diff>=abs_dif,"yes","no")),
|
||||
## Drop
|
||||
pase_0_cut=quantile_cut(as.numeric(pase_0),
|
||||
groups=4,
|
||||
@ -189,42 +190,38 @@ X_tbl <- X_tbl|>
|
||||
ordered.f = TRUE,
|
||||
inc.outs = TRUE,
|
||||
detail.lst=FALSE),
|
||||
pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")))
|
||||
pase_drop_fac=factor(ifelse(pase_6_cut==1&pase_0_cut!=1,"yes","no")),
|
||||
pase_hop_fac=factor(ifelse(pase_6_cut!=1&pase_0_cut==1,"yes","no")))
|
||||
|
||||
Hmisc::label(X_tbl) = as.list(var.labels[match(names(X_tbl), names(var.labels))])
|
||||
|
||||
# Setting final primary output from "pout"
|
||||
if (pout=="decl_rel"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_decl_rel_fac)
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_0!=0)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
if (pout=="decl_abs"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_decl_rel_fac)
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_0>=abs_dif)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
if (pout=="drop"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_drop_fac)
|
||||
|
||||
print(quantile(as.numeric(X_tbl$pase_0),na.rm=T))
|
||||
print(quantile(as.numeric(X_tbl$pase_6),na.rm=T))
|
||||
print(summary(X_tbl$pase_0_cut))
|
||||
# print(quantile(as.numeric(X_tbl$pase_0)))
|
||||
# print(quantile(as.numeric(X_tbl$pase_6)))
|
||||
# print(summary(X_tbl$pase_0_cut))
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_0_cut!=1)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
if (pout=="hop"){
|
||||
X_tbl <- X_tbl|>
|
||||
mutate(group=pase_hop_fac)
|
||||
|
||||
# print(quantile(as.numeric(X_tbl$pase_0)))
|
||||
# print(quantile(as.numeric(X_tbl$pase_6)))
|
||||
# print(summary(X_tbl$pase_0_cut))
|
||||
|
||||
X_tbl_f <- X_tbl|>
|
||||
filter(pase_6_cut!=1)|>
|
||||
select(-starts_with("pase_"))
|
||||
}
|
||||
|
||||
# Excluding one month measures for primary analysis and setting df for table one
|
||||
X_tbl_f <- X_tbl_f |>
|
||||
select(-c(who5_score_1,
|
||||
@ -273,3 +270,7 @@ library(pROC)
|
||||
library(gt)
|
||||
library(gtsummary)
|
||||
library(dplyr)
|
||||
library(tidyverse)
|
||||
library(glue)
|
||||
# library(ggdendro)
|
||||
library(corrplot)
|
||||
|
@ -1,6 +1,6 @@
|
||||
## ItMLiHS assignment data set
|
||||
|
||||
export<-read.csv("/Volumes/Data/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
|
||||
export<-read.csv("/Volumes/Data 1/exercise/source/background.csv",colClasses = "character", na.strings = c("NA","","unknown"))
|
||||
|
||||
export<-export[,c("pase_0",
|
||||
"age",
|
||||
|
@ -132,11 +132,19 @@ reg_coef_tbl<-tibble(
|
||||
# arrange(desc(abs(medianX)))%>%
|
||||
gt()
|
||||
|
||||
reg_coef_tbl
|
||||
|
||||
## ====================================================================
|
||||
# Step 6: plotting predictive performance
|
||||
## ====================================================================
|
||||
|
||||
reg_cfm<-confusionMatrix(cMatTest)
|
||||
reg_auc_sum<-summary(auc_test[,1])
|
||||
|
||||
## ====================================================================
|
||||
# Step 7: Packing list to save in loop
|
||||
## ====================================================================
|
||||
|
||||
ls[[i]] <- list("RegularisedCoefs"=reg_coef_tbl,
|
||||
"bestA"=best_alph,
|
||||
"bestL"=best_lamb,
|
||||
"ConfusionMatrx"=reg_cfm,
|
||||
"AUROC"=reg_auc_sum)
|
||||
|
1
2 Longterm/survival.R
Normal file
1
2 Longterm/survival.R
Normal file
@ -0,0 +1 @@
|
||||
survival
|
@ -5,7 +5,7 @@ account: cognitiveindex
|
||||
server: shinyapps.io
|
||||
hostUrl: https://api.shinyapps.io/v1
|
||||
appId: 6803928
|
||||
bundleId: 6217136
|
||||
bundleId: 6561652
|
||||
url: https://cognitiveindex.shinyapps.io/index_app/
|
||||
when: 1660899020.73485
|
||||
lastSyncTime: 1660899020.73486
|
||||
when: 1669625189.89414
|
||||
lastSyncTime: 1669625189.89416
|
||||
|
Loading…
Reference in New Issue
Block a user