## ## Master script ## ## Based on the assignment work from the ISL-course ## ## ## ## ==================================================================== # Step 0: Primary outcome ## ==================================================================== # Difs rel_dif <- 20 # 20 % difference abs_dif <- 20 # 20 point diff # pout <- "diff" # # Note:: By increasing the relative decline, the sensitivity increases and specificity declines. # This fact is an argument against over fitting. The reason being the nature of the clinical data and the fact, that predicting PA is difficult (!) # pout <- "drop" # Drop to first quartile # decl_rel # decl_abs # drop ## ==================================================================== ## Data ## ==================================================================== setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/") source("data_set.R") # Loading data-set from USB, to not store on computer source("data_format.R") ## ==================================================================== # Libraries ## ==================================================================== library(tidyverse) library(glue) library(patchwork) library(ggdendro) library(corrplot) library(gt) library(gtsummary) ## ==================================================================== ## ## Baseline ## ## ==================================================================== ## ==================================================================== # Step 0: labels ## ==================================================================== lbs<-var.labels[match(colnames(X_tbl), names(var.labels))] ls<-lapply(1:ncol(X_tbl),function(x){ as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\"")) }) ts<-tbl_summary(X_tbl|>filter(pase_0_cut!="1"), by = "group", missing = "no", # label = ls[-length(ls)], ## Removing the last, as this is output value = list(where(is.factor) ~ "2"), type = list(mrs_0 ~ "categorical"), statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]") )%>% add_overall() %>% add_n()%>% as_gt() ts ts_rtf <- file("table1.RTF", "w") writeLines(ts%>%as_rtf(), ts_rtf) close(ts_rtf) ## ==================================================================== # Step 1: labels ## ==================================================================== lbs<-var.labels[match(colnames(X_tbl_f), names(var.labels))] ls<-lapply(1:ncol(X_tbl_f),function(x){ as.formula(paste0(names(lbs)[x],"~","\"",lbs[x],"\"")) }) ## ==================================================================== # Step 2: table - edited ## ==================================================================== ts_e<-tbl_summary(X_tbl, missing = "no", # label = ls[-length(ls)], ## Removing the last, as this is output value = list(where(is.factor) ~ "2"), type = list(mrs_0 ~ "categorical", mrs_1 ~ "categorical"), statistic = list(all_continuous() ~ "{median} ({p25};{p75}) [{min},{max}]") )%>% as_gt() ts_e ## ==================================================================== # Step 3: table export ## ==================================================================== ts_rtf <- file("table1_overall.RTF", "w") writeLines(ts%>%as_rtf(), ts_rtf) close(ts_rtf) ## ==================================================================== ## ## Data variance ## ## Illustrating principal components. ## ## ==================================================================== # source("PCA.R") # # # pca22 # ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm") ## ==================================================================== ## ## Models ## ## ==================================================================== source("assign_full.R") source("regularisation_steps.R") ## ==================================================================== # Step 1: data merge ## ==================================================================== tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F) ## ==================================================================== # Step 2: table ## ==================================================================== com_coef_tbl<-tbl%>% gt()%>% fmt_number( columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric rows = everything(), decimals = 3)%>% tab_spanner( label = "Full model", columns = 6:8 )%>% tab_spanner( label = paste0("Regularised model, (a=", best_alph, ", l=", round(best_lamb,3), ")"), columns = 2:5 )%>% tab_header( title = "Model coefficients", subtitle = "Combined table of both full and regularised model coefficients" ) com_coef_tbl ## ==================================================================== # Step 3: export ## ==================================================================== com_coef_rtf <- file("table2.RTF", "w") writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf) close(com_coef_rtf) ## ==================================================================== ## ## Model performance ## ## Table with performance meassures for the two different models. ## ## ==================================================================== # ROC curve of best model p_roc ggsave("roc_plot.png",width = 12, height = 12, dpi = 300, limitsize = TRUE, units = "cm") ## ==================================================================== # Step 1: data set ## ==================================================================== tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"), "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3), "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3)) ## ==================================================================== # Step 2: table ## ==================================================================== tbl_perf<-tbl%>% gt()%>% tab_header( title = "Performance meassures", subtitle = "Combined table of both full and regularised performance meassures" ) tbl_perf ## ==================================================================== # Step 3: export ## ==================================================================== tbl_perf_rtf <- file("table3.RTF", "w") writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf) close(tbl_perf_rtf) # ## ==================================================================== ## ## Secondary analysis ## ## ==================================================================== Xy<-dta_s X<-dta_s|>select(-group) y<-dta_s$group source("assign_full.R") source("regularisation_steps.R") ## ==================================================================== # Step 1: data merge ## ==================================================================== tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F) ## ==================================================================== # Step 2: table ## ==================================================================== com_coef_tbl<-tbl%>% gt()%>% fmt_number( columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric rows = everything(), decimals = 3)%>% tab_spanner( label = "Full model", columns = 6:8 )%>% tab_spanner( label = paste0("Regularised model, (a=", best_alph, ", l=", round(best_lamb,3), ")"), columns = 2:5 )%>% tab_header( title = "Model coefficients", subtitle = "Combined table of both full and regularised model coefficients" ) com_coef_tbl ## ==================================================================== # Step 3: export ## ==================================================================== com_coef_rtf <- file("table2_sec.RTF", "w") writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf) close(com_coef_rtf) ## ==================================================================== ## ## Model performance ## ## Table with performance meassures for the two different models. ## ## ==================================================================== ## ==================================================================== # Step 1: data set ## ==================================================================== tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"), "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3), "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3)) ## ==================================================================== # Step 2: table ## ==================================================================== tbl_perf<-tbl%>% gt()%>% tab_header( title = "Performance meassures", subtitle = "Combined table of both full and regularised performance meassures" ) tbl_perf ## ==================================================================== # Step 3: export ## ==================================================================== tbl_perf_rtf <- file("table3_sec.RTF", "w") writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf) close(tbl_perf_rtf)