talos-pa-depression/Archive/regression_transformed_back.R

170 lines
5.1 KiB
R
Raw Normal View History

source("function_trans_cols.R")
if (trans_vars==TRUE){
# If trans_vars flag is TRUE, transform specified variables
dta<-trans_cols(dta_backup,sqrts=sqrt_vars,log1ps = log1p_vars_all)
} else {dta<-dta_backup}
library(dplyr)
source("function_back_trans.R")
## =============================================================================
## Loop
## =============================================================================
bm_list<-list()
for (i in 1:length(outs)){
## Bivariate
dta_l<-dta|>
dplyr::select(all_of(c("active_treat",vars,outs[i])))
sel<-dta_l|>
sapply(is.factor)
# i=1
biv<-dta_l|>
tbl_uvregression(data=_,
y=outs[i],
method=lm,
label = lab_sel(labels_all,vars),
show_single_row=colnames(dta_l)[sel],
estimate_fun = ~style_sigfig(.x,digits = 3),
pvalue_fun = ~style_pvalue(.x, digits = 3)
) |>
add_global_p()|>
bold_p() #|>
# bold_labels() |>
# italicize_levels()
# ## What follows is the pragmatic transformation and reformatting
#
# ## Transforming log1p() to expm1()
# biv$table_body$estimate<-expm1(biv$table_body$estimate)
# biv$table_body$conf.low<-expm1(biv$table_body$conf.low)
# biv$table_body$conf.high<-expm1(biv$table_body$conf.high)
#
# ## Transforming sqrt() to pase_0^2
# biv$table_body$estimate[biv$table_body$variable=="pase_0"]<-
# -biv$table_body$estimate[biv$table_body$variable=="pase_0"]^2
#
# low<-biv$table_body$conf.low[biv$table_body$variable=="pase_0"]^2
# high<-biv$table_body$conf.high[biv$table_body$variable=="pase_0"]^2
#
# biv$table_body$conf.low[biv$table_body$variable=="pase_0"]<-(-low)
# biv$table_body$conf.high[biv$table_body$variable=="pase_0"]<-(-high)
#
# ## Transforming log1p() to expm1()
# biv$table_body$estimate[biv$table_body$variable=="nihss_0"]<-
# expm1(biv$table_body$estimate[biv$table_body$variable=="nihss_0"])
#
# low<-expm1(biv$table_body$conf.low[biv$table_body$variable=="nihss_0"])
# high<-expm1(biv$table_body$conf.high[biv$table_body$variable=="nihss_0"])
#
# biv$table_body$conf.low[biv$table_body$variable=="nihss_0"]<-low
# biv$table_body$conf.high[biv$table_body$variable=="nihss_0"]<-high
#
# ## New confidence intervals
# # biv$table_body$estimate<-format(biv$table_body$estimate, drop0trailing = F,digits =2)
# biv$table_body$ci<-paste(formatC(biv$table_body$conf.low, digits = 3, format = "f"),
# formatC(biv$table_body$conf.high, digits = 3, format = "f"),
# sep=", ")
## multivariate
mul<-dta_l |>
lm(formula(paste(c(outs[i],"."),collapse="~")),
data = _) |>
tbl_regression(label = lab_sel(labels_all,vars),
show_single_row=colnames(dta_l)[sel],
estimate_fun = ~style_sigfig(.x,digits = 3),
pvalue_fun = ~style_pvalue(.x, digits = 3)
)|>
add_n() |>
add_global_p() |>
bold_p() #|>
# bold_labels() |>
# italicize_levels()
if (trans_back==TRUE){
ls<-lapply(list(biv,mul), back_trans, outm = "log1p" ,sqrts = "pase_0",log1ps = "nihss_0")
} else {ls<-list(biv,mul)}
## Merge
biv_mul<-tbl_merge(
tbls = ls,
tab_spanner = c("**Bivariate linear regression**",
"**Multivariate linear regression**")
)
bm_list[[i]]<-biv_mul
}
## =============================================================================
## Big merge
## =============================================================================
if (trans_back==TRUE){tab_span<-c("**One month follow up [TRANS t/r]**",
"**Six months follow up [TRANS t/r]**")
} else {tab_span<-c("**One month follow up**",
"**Six months follow up**")}
bm_16_tbl<-tbl_merge(
tbls = bm_list,
tab_spanner = c("**One month follow up**",
"**Six months follow up**")
)
bm_16_tbl
bm_16_tbl_rtf <- file("bm_16_tbl_trans_back.RTF", "w")
writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_tbl_rtf)
close(bm_16_tbl_rtf)
bm_16_tbl %>% # build gtsummary table
as_gt() %>% # convert to gt table
gt::gtsave( # save table as image
filename = "bm_16_tbl_trans_back.png"
)
## Tests and visualisation
## Box-Cox power transformation performs comparably to logarithmic transformation. The latter is much easier to explain.
library(MASS)
dta_bc<-dta_backup|>
dplyr::select(all_of(c("mdi_6_newobs_enr",vars)))|>
mutate(pase_0=sqrt(pase_0),
mdi_6_newobs_enr=mdi_6_newobs_enr+1)#|>
# na.omit()
bc<-boxcox(mdi_6_newobs_enr~.,data=dta_bc)
lambda <- bc$x[which.max(bc$y)]
## Q-Q plots to compare the two different approaches, and the non-transformed
q1 <- qqnorm(lm(((mdi_6_newobs_enr^lambda-1)/lambda) ~ .,data=dta_bc)$residuals)
q2 <- qqnorm(lm(log(mdi_6_newobs_enr) ~ .,data=dta_bc)$residuals)
library(patchwork)
plot(q1); plot(q2)
## Histograms for reference
h1 <- hist(dta$pase_0,40); hist(sqrt(dta$pase_0),40)
h2 <- hist(expm1(dta$mdi_6_newobs_enr),40); hist((dta$mdi_6_newobs_enr),40) ## Observed MDI, and log transformed MDI
plot(h1); plot(h2)