--- title: "revised statistics" author: "AGDamsbo" date: "8/1/2022" output: html_document toc: true --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} library(REDCapR) library(gtsummary) theme_gtsummary_compact() library(REDCapR) library(gt) library(lubridate) library(dplyr) library(tidyr) ``` # Import ```{r} dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv") ``` # Defining patients to include for analysis Only including cases with complete pase_0 and MDI at 1 & 6 months ```{r} dta<-dta_all[!is.na(dta_all$pase_0),] # &!is.na(dta$mdi_1)&!is.na(dta$mdi_6) ``` ## Formatting ```{r echo=FALSE} dta$diabetes<-factor(dta$diabetes) dta$pad<-factor(dta$pad) dta$cohab<-ifelse(dta$civil=="partner","yes","no")|> factor() dta$hypertension<-factor(dta$hypertension) dta$afli[dta$afli=="unknown"]<-NA dta$afli<-factor(dta$afli) dta$ever_smoker<-ifelse(dta$smoke_ever=="ever","yes","no")|> factor() dta$ami<-factor(dta$ami) dta$tci<-factor(dta$tci) dta$thrombolysis<-factor(dta$thrombolysis) dta$thrombechtomy<-factor(dta$thrombechtomy) dta$any_reperf<-ifelse(dta$rep_any=="rep","yes","no")|> factor() dta$pad<-factor(dta$pad) dta$nihss_0<-as.numeric(dta$nihss_0) dta$age<-as.numeric(dta$age) dta$active_treat<-ifelse(dta$rtreat=="Active","yes","no")|> factor() # dta$rtreat<-factor(dta$rtreat) dta$female<-ifelse(dta$sex=="female","yes","no")|> factor() dta$pase_0<-as.numeric(dta$pase_0) dta$pase_6<-as.numeric(dta$pase_6) dta$bmi<-as.numeric(dta$bmi) dta$mdi_6<-as.numeric(dta$mdi_6) dta$pase_0_bin<-factor(dta$pase_0_bin,levels=c("lower","higher")) dta$nihss_0_isna<-is.na(dta$nihss_0) ``` ```{r} vars<-c("pase_0", "female", "age", "cohab", "ever_smoker", "diabetes", "hypertension", "afli", "ami", "tci", "pad", "nihss_0", "any_reperf") # tbl1_vars<-c("thrombolysis", "thrombechtomy","inc_time") labels_all<-list(active_treat~"Active trial treatment", pase_0~"PASE score", age~"Age", female~"Female sex", ever_smoker~"History of smoking", cohab~"Cohabitation", diabetes~"Known diabetes", hypertension~"Known hypertension", afli~"Known Atrialfibrillation", ami~"Previos myocardial infarction", tci~"Previos TIA", pad~"Known peripheral arteriosclerotic disease", nihss_0~"NIHSS score", thrombolysis~"Thrombolytic therapy", thrombechtomy~"Endovascular treatment", any_reperf~"Any reperfusion treatment", inc_time~"Study inclusion time", '[Intercept]'~"Intercept") lab_sel<-function(label_list,variables_vector){ ## Helper function to select labels for gtsummary function from list of all labels based on selected variables. ## Long names in try to ease reading. include_index<-c() for (i in 1:length(label_list)) { include_index[i]<-as.character(label_list[[i]])[2] %in% variables_vector } return(label_list[include_index]) } dta_backup<-dta ``` # Table 1 ```{r} tbl1_vars<-c("active_treat",vars,"inc_time") dta|> tbl_summary(missing = "ifany", include = all_of(tbl1_vars), missing_text="(Missing)", label = lab_sel(labels_all,tbl1_vars) )|> add_n()|> as_gt() |> # modify with gt functions gt::tab_header("Baseline Characteristics") |> gt::tab_options( table.font.size = "small", data_row.padding = gt::px(1)) ``` # Regression - all ```{r} outs<-c("mdi_1_enr","mdi_6_newobs_enr") ``` ## Non-stratified ```{r} source("biv_mul.R") bm_16_tbl_rtf <- file("bm_16_tbl.RTF", "w") writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_tbl_rtf) close(bm_16_tbl_rtf) bm_16_tbl %>% # build gtsummary table as_gt() %>% # convert to gt table gt::gtsave( # save table as image filename = "bm_16_tbl.png" ) ``` ## Stratified by treatment ```{r} source("biv_mul_strat.R") bm_16_tbl_strat_rtf <- file("bm_16_tbl_strat.RTF", "w") writeLines(bm_16_tbl_strat%>%as_gt()%>%as_rtf(), bm_16_tbl_strat_rtf) close(bm_16_tbl_strat_rtf) bm_16_tbl_strat %>% # build gtsummary table as_gt() %>% # convert to gt table gt::gtsave( # save table as image filename = "bm_16_tbl_strat.png" ) ``` # Regression - sensitivity ```{r} outs<-c("mdi_1","mdi_6") ``` ## Non-stratified ```{r} source("biv_mul.R") bm_16_sens_tbl_rtf <- file("bm_16_sens_tbl.RTF", "w") writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_sens_tbl_rtf) close(bm_16_sens_tbl_rtf) bm_16_tbl %>% # build gtsummary table as_gt() %>% # convert to gt table gt::gtsave( # save table as image filename = "bm_16_tbl_sens.png" ) ``` ## Stratified by treatment ```{r} source("biv_mul_strat.R") bm_16_tbl_strat_sens_rtf <- file("bm_16_tbl_strat_sens.RTF", "w") writeLines(bm_16_tbl_strat%>%as_gt()%>%as_rtf(), bm_16_tbl_strat_sens_rtf) close(bm_16_tbl_strat_sens_rtf) bm_16_tbl_strat %>% # build gtsummary table as_gt() %>% # convert to gt table gt::gtsave( # save table as image filename = "bm_16_tbl_strat_sens.png" ) ``` # Regression - interaction ```{r} outs<-c("mdi_1_enr","mdi_6_newobs_enr") ``` ```{r} ## ============================================================================= ## Loop ## ============================================================================= bm_list<-list() for (i in 1:length(outs)){ ## Multivariate mul<-dta |> dplyr::select(all_of(c("active_treat",vars,outs[i])))|> lm(formula( paste(c(paste(c(outs[i],paste(c("active_treat","pase_0"),collapse="*")),collapse="~"),"."),collapse="+")), data = _)|> tbl_regression(label = lab_sel(labels_all,c(vars,"active_treat")))|> add_n() |> add_global_p() |> bold_p() |> bold_labels() |> italicize_levels() bm_list[[i]]<-mul } ## ============================================================================= ## Big merge ## ============================================================================= tbl_merge( tbls = bm_list, tab_spanner = c("**One month follow up**", "**Six months follow up**") ) ``` ```{r} library(aod) for (i in 1:length(outs)){ model<-dta |> dplyr::select(all_of(c("active_treat",vars,outs[i])))|> lm(formula( paste(c(paste(c(outs[i],paste(c("active_treat","pase_0"),collapse="*")),collapse="~"),"."),collapse="+")), data = _) wt<-wald.test(Sigma = vcov(model), b = coef(model), Terms = model$rank # Rank gives number of coefficients. The interaction is the last. ) print(wt) } ``` # Regression - nihss ~ pase_0 ```{r} vars_nihss<-c( "pase_0", "female", "age", "cohab", "ever_smoker", "diabetes", "hypertension", "afli", "ami", "tci", "pad", "nihss_0") ## Bivariate biv<-dta|> dplyr::select(all_of(vars_nihss))|> tbl_uvregression(data=_, y="nihss_0", method=lm#, #label = lab_sel(labels_all,vars) ) |> add_global_p()|> bold_p() |> bold_labels() |> italicize_levels() ## Multivariate mul<-dta |> dplyr::select(all_of(vars_nihss))|> lm(nihss_0~pase_0+., data = _) |> tbl_regression( #label = lab_sel(labels_all,vars), #intercept=FALSE )|> add_n() |> add_global_p() |> bold_p() |> bold_labels() |> italicize_levels() ## Merge tbl_merge( tbls = list(biv, mul), tab_spanner = c("**Bivariate linear regression**", "**Multivariate linear regression**") ) ``` # Transformed data ```{r} # dta<-dta_backup dta<-dta|> mutate(pase_0=sqrt(pase_0), nihss_0=log1p(nihss_0), mdi_1_enr=log1p(mdi_1_enr), # log1p(x) svarer til log(x+1) mdi_6_newobs_enr=log1p(mdi_6_newobs_enr))|> data.frame() library(dplyr) # source("biv_mul.R") ``` ```{r} ## ============================================================================= ## Loop ## ============================================================================= bm_list<-list() for (i in 1:length(outs)){ ## Bivariate dta_l<-dta|> dplyr::select(all_of(c("active_treat",vars,outs[i]))) sel<-dta_l|> sapply(is.factor) # i=1 biv<-dta_l|> tbl_uvregression(data=_, y=outs[i], method=lm, label = lab_sel(labels_all,vars), show_single_row=colnames(dta_l)[sel] ) |> add_global_p()|> bold_p() |> bold_labels() |> italicize_levels() ## What follows is the pragmatic transformation and reformatting ## Transforming log2() to 2^() # biv$table_body$estimate<-expm1(biv$table_body$estimate) # biv$table_body$conf.low<-expm1(biv$table_body$conf.low) # biv$table_body$conf.high<-expm1(biv$table_body$conf.high) # # ## Transforming sqrt(pase_0) to pase_0^2 # biv$table_body$estimate[biv$table_body$variable=="pase_0"]<- # biv$table_body$estimate[biv$table_body$variable=="pase_0"]^2 # # low<-biv$table_body$conf.low[biv$table_body$variable=="pase_0"]^2 # high<-biv$table_body$conf.high[biv$table_body$variable=="pase_0"]^2 # # biv$table_body$conf.low[biv$table_body$variable=="pase_0"]<-high # biv$table_body$conf.high[biv$table_body$variable=="pase_0"]<-low # # ## New confidence intervals # # biv$table_body$estimate<-format(biv$table_body$estimate, drop0trailing = F,digits =2) # biv$table_body$ci<-paste(formatC(biv$table_body$conf.low, digits = 3, format = "f"), # formatC(biv$table_body$conf.high, digits = 3, format = "f"), # sep=", ") # biv$table_body$ci<-paste(format(biv$table_body$conf.low, digits = 2, drop0trailing = FALSE), # format(biv$table_body$conf.high, digits = 2, drop0trailing = FALSE), # sep=", ") # Formatting from this: https://stackoverflow.com/questions/12243071/r-keeping-0-0-when-using-paste-or-paste0 ## multivariate mul<-dta_l |> lm(formula(paste(c(outs[i],"."),collapse="~")), data = _) |> tbl_regression(label = lab_sel(labels_all,vars), show_single_row=colnames(dta_l)[sel] )|> add_n() |> add_global_p() |> bold_p() |> bold_labels() |> italicize_levels() ## Merge biv_mul<-tbl_merge( tbls = list(biv, mul), tab_spanner = c("**Bivariate linear regression**", "**Multivariate linear regression**") ) bm_list[[i]]<-biv_mul } ## ============================================================================= ## Big merge ## ============================================================================= bm_16_tbl<-tbl_merge( tbls = bm_list, tab_spanner = c("**One month follow up**", "**Six months follow up**") ) bm_16_tbl ``` ```{r} bm_16_tbl_rtf <- file("bm_16_tbl_trans.RTF", "w") writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_tbl_rtf) close(bm_16_tbl_rtf) bm_16_tbl %>% # build gtsummary table as_gt() %>% # convert to gt table gt::gtsave( # save table as image filename = "bm_16_tbl_trans.png" ) ``` ## Tests and visualisation Box-Cox power transformation performs comparably to logarithmic transformation. The latter is much easier to explain. ```{r} library(MASS) dta_bc<-dta_backup|> dplyr::select(all_of(c("mdi_6_newobs_enr",vars)))|> mutate(pase_0=sqrt(pase_0), mdi_6_newobs_enr=mdi_6_newobs_enr+1)#|> # na.omit() bc<-boxcox(mdi_6_newobs_enr~.,data=dta_bc) lambda <- bc$x[which.max(bc$y)] ``` Q-Q plots to compare the two different approaches, and the non-transformed ```{r} qqnorm(lm(((mdi_6_newobs_enr^lambda-1)/lambda) ~ .,data=dta_bc)$residuals) qqnorm(lm(log(mdi_6_newobs_enr) ~ .,data=dta_bc)$residuals) ``` Histograms for reference ```{r} hist(dta$pase_0,40); hist(sqrt(dta$pase_0),40) ``` ```{r} hist(expm1(dta$mdi_6_newobs_enr),40); hist((dta$mdi_6_newobs_enr),40) ## Observed MDI, and log transformed MDI ```