## ## Master script ## ## Based on the assignment work from the ISL-course ## ## Generation 2 - 02.december.2022 ## Code preparation for analysis on Denmarks Statistics server with enriched data set. ## ## Analysis plan: ## Table 1 ## Figure 1: Sankey plot (drop & hop colored) ## Table 2: Linear regression model of pase_6~. ## Table 3: Elastic net prediction models of drop and hop. Performance measures referenced in text. ## ## A Rmarkdown file could be created to write the initial report with main results. ## This code is a bit of a mess, as it is the result of several iterations. It works however. ## ## ==================================================================== # Step 0: Primary outcome ## ==================================================================== # Script to run as hop and drop pout <- "drop" # Drop to first quartile # decl_rel # decl_abs # drop # hop ## ==================================================================== ## Data ## ==================================================================== setwd("/Users/au301842/PhysicalActivityandStrokeOutcome/1 PA Decline/") source("data_set.R") # Loading data-set from USB, to not store on computer source("data_format.R") ## ==================================================================== ## ## Baseline - by PASE group ## ## ==================================================================== ts_q <- X_tbl |> select(vars) |> mutate(pase_0_cut = factor(quantile_cut(pase_0, groups = 4)[[1]],ordered = TRUE)) |> select(-pase_6,-pase_0) |> tbl_summary(missing = "no", by="pase_0_cut", value = list(where(is.factor) ~ "2"), type = list(mrs_0 ~ "categorical", all_continuous() ~ "continuous2"), statistic = list(all_continuous() ~ c("{N_nonmiss}", "{median} ({p25}, {p75})", "{min}, {max}", "{mean} ({sd})")) ) |> add_overall() |> add_n () ts_q ## ==================================================================== # Drops and hops ## ==================================================================== # TRUEs are patients dropping table(X_tbl$pase_0_cut!="1"&X_tbl$pase_6_cut=="1")/nrow(X_tbl[X_tbl$pase_0_cut!="1",]) # TRUEs are percentage of patients inactive before stroke being more active after table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_0_cut=="1",]) # TRUEs are percentage of patients being more active after that were inactive before stroke table(X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1")/nrow(X_tbl[X_tbl$pase_6_cut!="1",]) # Difference between hop/no-hop t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"]) summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"]) summary(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"]) boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut!="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"]) # Stationary low t.test(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"]) boxplot(X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_0"],X_tbl[X_tbl$pase_0_cut=="1"&X_tbl$pase_6_cut=="1","pase_6"]) ## ==================================================================== # Sankey plot ## ==================================================================== # source("sankey.R") # p_delta ## ==================================================================== # Six months PASE: Bivariate and multivariate analyses ## ==================================================================== dta_lmreg <- X_tbl |> select(vars) |> mutate(mrs_0=factor(ifelse(mrs_0==1,1,2))) Hmisc::label(dta_lmreg$mrs_0) <- "Pre-stroke mRS >0" uv_reg <- tbl_uvregression(data=dta_lmreg, method=lm, y="pase_6", show_single_row = where(is.factor), estimate_fun = ~style_sigfig(.x,digits = 3), pvalue_fun = ~style_pvalue(.x, digits = 3) ) mu_reg <- dta_lmreg |> lm(formula=pase_6~.,data=_) |> tbl_regression(show_single_row = where(is.factor), estimate_fun = ~style_sigfig(.x,digits = 3), pvalue_fun = ~style_pvalue(.x, digits = 3) )|> add_n() tbl_merge(list(uv_reg,mu_reg)) ## ==================================================================== ## ## Data variance ## ## Illustrating principal components. ## ## ==================================================================== # source("PCA.R") # # # pca22 # ggsave("pc_plot.png",width = 18, height = 12, dpi = 300, limitsize = TRUE, units = "cm") ## ==================================================================== ## ## Models ## ## ==================================================================== # source("assign_full.R") ls <- list() for (i in c("drop","hop")){ pout <- i source("data_format.R") source("regularisation_steps.R") } # Loop to run regularised model on both drop and hop. # Saved in list for printing and exporting the plot. ## ==================================================================== # Step 1: data merge ## ==================================================================== tbl<-merge(ls$drop$RegularisedCoefs$'_data',ls$hop$RegularisedCoefs$'_data',by="name",all.x=T, sort=F) ## ==================================================================== # Step 2: table ## ==================================================================== com_coef_tbl<-tbl%>% gt()%>% fmt_number( columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric rows = everything(), decimals = 3)%>% tab_spanner( label = "DROP", columns = 2:5 )%>% tab_spanner( label = "HOP", columns = 6:9 )%>% tab_header( title = "Model coefficients", subtitle = "Combined table of both full and regularised model coefficients" ) # paste0("Regularised model, (a=", # best_alph, # ", l=", # round(best_lamb,3), # ")") com_coef_tbl ## ==================================================================== # Step 3: export ## ==================================================================== com_coef_rtf <- file("table2.RTF", "w") writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf) close(com_coef_rtf) ## ==================================================================== ## ## Model performance ## ## Table with performance meassures for the two different models. ## ## ==================================================================== ## ==================================================================== # Step 1: data set ## ==================================================================== tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"), "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3), "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3)) ## ==================================================================== # Step 2: table ## ==================================================================== tbl_perf<-tbl%>% gt()%>% tab_header( title = "Performance meassures", subtitle = "Combined table of both full and regularised performance meassures" ) tbl_perf ## ==================================================================== # Step 3: export ## ==================================================================== tbl_perf_rtf <- file("table3.RTF", "w") writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf) close(tbl_perf_rtf) # ## ==================================================================== ## ## Secondary analysis ## ## ==================================================================== Xy<-dta_s X<-dta_s|>select(-group) y<-dta_s$group source("assign_full.R") source("regularisation_steps.R") ## ==================================================================== # Step 1: data merge ## ==================================================================== tbl<-merge(reg_coef_tbl$'_data',full_coef_tbl$'_data',by="name",all.x=T, sort=F) ## ==================================================================== # Step 2: table ## ==================================================================== com_coef_tbl<-tbl%>% gt()%>% fmt_number( columns=colnames(tbl)[sapply(tbl,is.numeric)], ## Selecting all numeric rows = everything(), decimals = 3)%>% tab_spanner( label = "Full model", columns = 6:8 )%>% tab_spanner( label = paste0("Regularised model, (a=", best_alph, ", l=", round(best_lamb,3), ")"), columns = 2:5 )%>% tab_header( title = "Model coefficients", subtitle = "Combined table of both full and regularised model coefficients" ) com_coef_tbl ## ==================================================================== # Step 3: export ## ==================================================================== com_coef_rtf <- file("table2_sec.RTF", "w") writeLines(com_coef_tbl%>%as_rtf(), com_coef_rtf) close(com_coef_rtf) ## ==================================================================== ## ## Model performance ## ## Table with performance meassures for the two different models. ## ## ==================================================================== ## ==================================================================== # Step 1: data set ## ==================================================================== tbl<-data.frame(Meassure=c(names(full_cfm$byClass),"Mean AUC"), "Regularised model"=round(c(reg_cfm$byClass,reg_auc_sum["Mean"]),3), "Full model"=round(c(full_cfm$byClass,full_auc_sum["Mean"]),3)) ## ==================================================================== # Step 2: table ## ==================================================================== tbl_perf<-tbl%>% gt()%>% tab_header( title = "Performance meassures", subtitle = "Combined table of both full and regularised performance meassures" ) tbl_perf ## ==================================================================== # Step 3: export ## ==================================================================== tbl_perf_rtf <- file("table3_sec.RTF", "w") writeLines(tbl_perf%>%as_rtf(), tbl_perf_rtf) close(tbl_perf_rtf)