508 lines
12 KiB
Plaintext
508 lines
12 KiB
Plaintext
|
---
|
||
|
title: "revised statistics"
|
||
|
author: "AGDamsbo"
|
||
|
date: "8/1/2022"
|
||
|
output: html_document
|
||
|
toc: true
|
||
|
---
|
||
|
|
||
|
```{r setup, include=FALSE}
|
||
|
knitr::opts_chunk$set(echo = TRUE)
|
||
|
```
|
||
|
|
||
|
```{r}
|
||
|
library(REDCapR)
|
||
|
library(gtsummary)
|
||
|
theme_gtsummary_compact()
|
||
|
library(REDCapR)
|
||
|
library(gt)
|
||
|
|
||
|
library(lubridate)
|
||
|
library(dplyr)
|
||
|
library(tidyr)
|
||
|
|
||
|
```
|
||
|
|
||
|
# Import
|
||
|
|
||
|
|
||
|
```{r}
|
||
|
dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv")
|
||
|
```
|
||
|
|
||
|
# Defining patients to include for analysis
|
||
|
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||
|
```{r}
|
||
|
dta<-dta_all[!is.na(dta_all$pase_0),]
|
||
|
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||
|
```
|
||
|
|
||
|
## Formatting
|
||
|
```{r echo=FALSE}
|
||
|
dta$diabetes<-factor(dta$diabetes)
|
||
|
dta$pad<-factor(dta$pad)
|
||
|
|
||
|
dta$cohab<-ifelse(dta$civil=="partner","yes","no")|>
|
||
|
factor()
|
||
|
|
||
|
dta$hypertension<-factor(dta$hypertension)
|
||
|
|
||
|
dta$afli[dta$afli=="unknown"]<-NA
|
||
|
dta$afli<-factor(dta$afli)
|
||
|
|
||
|
dta$ever_smoker<-ifelse(dta$smoke_ever=="ever","yes","no")|>
|
||
|
factor()
|
||
|
|
||
|
dta$ami<-factor(dta$ami)
|
||
|
dta$tci<-factor(dta$tci)
|
||
|
dta$thrombolysis<-factor(dta$thrombolysis)
|
||
|
dta$thrombechtomy<-factor(dta$thrombechtomy)
|
||
|
|
||
|
dta$any_reperf<-ifelse(dta$rep_any=="rep","yes","no")|>
|
||
|
factor()
|
||
|
|
||
|
dta$pad<-factor(dta$pad)
|
||
|
dta$nihss_0<-as.numeric(dta$nihss_0)
|
||
|
dta$age<-as.numeric(dta$age)
|
||
|
|
||
|
dta$active_treat<-ifelse(dta$rtreat=="Active","yes","no")|>
|
||
|
factor()
|
||
|
# dta$rtreat<-factor(dta$rtreat)
|
||
|
|
||
|
dta$female<-ifelse(dta$sex=="female","yes","no")|>
|
||
|
factor()
|
||
|
|
||
|
dta$pase_0<-as.numeric(dta$pase_0)
|
||
|
dta$pase_6<-as.numeric(dta$pase_6)
|
||
|
dta$bmi<-as.numeric(dta$bmi)
|
||
|
dta$mdi_6<-as.numeric(dta$mdi_6)
|
||
|
dta$pase_0_bin<-factor(dta$pase_0_bin,levels=c("lower","higher"))
|
||
|
|
||
|
dta$nihss_0_isna<-is.na(dta$nihss_0)
|
||
|
```
|
||
|
|
||
|
```{r}
|
||
|
vars<-c("pase_0",
|
||
|
"female",
|
||
|
"age",
|
||
|
"cohab",
|
||
|
"ever_smoker",
|
||
|
"diabetes",
|
||
|
"hypertension",
|
||
|
"afli",
|
||
|
"ami",
|
||
|
"tci",
|
||
|
"pad",
|
||
|
"nihss_0",
|
||
|
"any_reperf")
|
||
|
|
||
|
# tbl1_vars<-c("thrombolysis", "thrombechtomy","inc_time")
|
||
|
|
||
|
labels_all<-list(active_treat~"Active trial treatment",
|
||
|
pase_0~"PASE score",
|
||
|
age~"Age",
|
||
|
female~"Female sex",
|
||
|
ever_smoker~"History of smoking",
|
||
|
cohab~"Cohabitation",
|
||
|
diabetes~"Known diabetes",
|
||
|
hypertension~"Known hypertension",
|
||
|
afli~"Known Atrialfibrillation",
|
||
|
ami~"Previos myocardial infarction",
|
||
|
tci~"Previos TIA",
|
||
|
pad~"Known peripheral arteriosclerotic disease",
|
||
|
nihss_0~"NIHSS score",
|
||
|
thrombolysis~"Thrombolytic therapy",
|
||
|
thrombechtomy~"Endovascular treatment",
|
||
|
any_reperf~"Any reperfusion treatment",
|
||
|
inc_time~"Study inclusion time",
|
||
|
'[Intercept]'~"Intercept")
|
||
|
|
||
|
lab_sel<-function(label_list,variables_vector){
|
||
|
## Helper function to select labels for gtsummary function from list of all labels based on selected variables.
|
||
|
## Long names in try to ease reading.
|
||
|
include_index<-c()
|
||
|
for (i in 1:length(label_list)) {
|
||
|
include_index[i]<-as.character(label_list[[i]])[2] %in% variables_vector
|
||
|
}
|
||
|
return(label_list[include_index])
|
||
|
}
|
||
|
|
||
|
dta_backup<-dta
|
||
|
```
|
||
|
|
||
|
# Table 1
|
||
|
```{r}
|
||
|
tbl1_vars<-c("active_treat",vars,"inc_time")
|
||
|
|
||
|
dta|>
|
||
|
tbl_summary(missing = "ifany",
|
||
|
include = all_of(tbl1_vars),
|
||
|
missing_text="(Missing)",
|
||
|
label = lab_sel(labels_all,tbl1_vars)
|
||
|
)|>
|
||
|
add_n()|>
|
||
|
as_gt() |>
|
||
|
# modify with gt functions
|
||
|
gt::tab_header("Baseline Characteristics") |>
|
||
|
gt::tab_options(
|
||
|
table.font.size = "small",
|
||
|
data_row.padding = gt::px(1))
|
||
|
```
|
||
|
|
||
|
|
||
|
# Regression - all
|
||
|
|
||
|
```{r}
|
||
|
outs<-c("mdi_1_enr","mdi_6_newobs_enr")
|
||
|
```
|
||
|
|
||
|
## Non-stratified
|
||
|
|
||
|
```{r}
|
||
|
source("biv_mul.R")
|
||
|
|
||
|
bm_16_tbl_rtf <- file("bm_16_tbl.RTF", "w")
|
||
|
writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_tbl_rtf)
|
||
|
close(bm_16_tbl_rtf)
|
||
|
|
||
|
bm_16_tbl %>% # build gtsummary table
|
||
|
as_gt() %>% # convert to gt table
|
||
|
gt::gtsave( # save table as image
|
||
|
filename = "bm_16_tbl.png"
|
||
|
)
|
||
|
```
|
||
|
|
||
|
|
||
|
## Stratified by treatment
|
||
|
|
||
|
```{r}
|
||
|
source("biv_mul_strat.R")
|
||
|
|
||
|
bm_16_tbl_strat_rtf <- file("bm_16_tbl_strat.RTF", "w")
|
||
|
writeLines(bm_16_tbl_strat%>%as_gt()%>%as_rtf(), bm_16_tbl_strat_rtf)
|
||
|
close(bm_16_tbl_strat_rtf)
|
||
|
|
||
|
bm_16_tbl_strat %>% # build gtsummary table
|
||
|
as_gt() %>% # convert to gt table
|
||
|
gt::gtsave( # save table as image
|
||
|
filename = "bm_16_tbl_strat.png"
|
||
|
)
|
||
|
```
|
||
|
|
||
|
# Regression - sensitivity
|
||
|
|
||
|
```{r}
|
||
|
outs<-c("mdi_1","mdi_6")
|
||
|
```
|
||
|
|
||
|
## Non-stratified
|
||
|
|
||
|
```{r}
|
||
|
source("biv_mul.R")
|
||
|
|
||
|
bm_16_sens_tbl_rtf <- file("bm_16_sens_tbl.RTF", "w")
|
||
|
writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_sens_tbl_rtf)
|
||
|
close(bm_16_sens_tbl_rtf)
|
||
|
|
||
|
bm_16_tbl %>% # build gtsummary table
|
||
|
as_gt() %>% # convert to gt table
|
||
|
gt::gtsave( # save table as image
|
||
|
filename = "bm_16_tbl_sens.png"
|
||
|
)
|
||
|
```
|
||
|
|
||
|
|
||
|
## Stratified by treatment
|
||
|
|
||
|
```{r}
|
||
|
source("biv_mul_strat.R")
|
||
|
|
||
|
bm_16_tbl_strat_sens_rtf <- file("bm_16_tbl_strat_sens.RTF", "w")
|
||
|
writeLines(bm_16_tbl_strat%>%as_gt()%>%as_rtf(), bm_16_tbl_strat_sens_rtf)
|
||
|
close(bm_16_tbl_strat_sens_rtf)
|
||
|
|
||
|
bm_16_tbl_strat %>% # build gtsummary table
|
||
|
as_gt() %>% # convert to gt table
|
||
|
gt::gtsave( # save table as image
|
||
|
filename = "bm_16_tbl_strat_sens.png"
|
||
|
)
|
||
|
```
|
||
|
|
||
|
|
||
|
# Regression - interaction
|
||
|
|
||
|
```{r}
|
||
|
outs<-c("mdi_1_enr","mdi_6_newobs_enr")
|
||
|
```
|
||
|
|
||
|
```{r}
|
||
|
## =============================================================================
|
||
|
## Loop
|
||
|
## =============================================================================
|
||
|
|
||
|
bm_list<-list()
|
||
|
|
||
|
for (i in 1:length(outs)){
|
||
|
## Multivariate
|
||
|
mul<-dta |>
|
||
|
dplyr::select(all_of(c("active_treat",vars,outs[i])))|>
|
||
|
lm(formula(
|
||
|
paste(c(paste(c(outs[i],paste(c("active_treat","pase_0"),collapse="*")),collapse="~"),"."),collapse="+")),
|
||
|
data = _)|>
|
||
|
tbl_regression(label = lab_sel(labels_all,c(vars,"active_treat")))|>
|
||
|
add_n() |>
|
||
|
add_global_p() |>
|
||
|
bold_p() |>
|
||
|
bold_labels() |>
|
||
|
italicize_levels()
|
||
|
|
||
|
bm_list[[i]]<-mul
|
||
|
}
|
||
|
|
||
|
## =============================================================================
|
||
|
## Big merge
|
||
|
## =============================================================================
|
||
|
|
||
|
tbl_merge(
|
||
|
tbls = bm_list,
|
||
|
tab_spanner = c("**One month follow up**",
|
||
|
"**Six months follow up**")
|
||
|
)
|
||
|
```
|
||
|
|
||
|
```{r}
|
||
|
library(aod)
|
||
|
|
||
|
for (i in 1:length(outs)){
|
||
|
model<-dta |>
|
||
|
dplyr::select(all_of(c("active_treat",vars,outs[i])))|>
|
||
|
lm(formula(
|
||
|
paste(c(paste(c(outs[i],paste(c("active_treat","pase_0"),collapse="*")),collapse="~"),"."),collapse="+")),
|
||
|
data = _)
|
||
|
|
||
|
wt<-wald.test(Sigma = vcov(model),
|
||
|
b = coef(model),
|
||
|
Terms = model$rank # Rank gives number of coefficients. The interaction is the last.
|
||
|
)
|
||
|
print(wt)
|
||
|
}
|
||
|
```
|
||
|
|
||
|
# Regression - nihss ~ pase_0
|
||
|
|
||
|
```{r}
|
||
|
vars_nihss<-c(
|
||
|
"pase_0",
|
||
|
"female",
|
||
|
"age",
|
||
|
"cohab",
|
||
|
"ever_smoker",
|
||
|
"diabetes",
|
||
|
"hypertension",
|
||
|
"afli",
|
||
|
"ami",
|
||
|
"tci",
|
||
|
"pad",
|
||
|
"nihss_0")
|
||
|
|
||
|
## Bivariate
|
||
|
biv<-dta|>
|
||
|
dplyr::select(all_of(vars_nihss))|>
|
||
|
tbl_uvregression(data=_,
|
||
|
y="nihss_0",
|
||
|
method=lm#,
|
||
|
#label = lab_sel(labels_all,vars)
|
||
|
) |>
|
||
|
add_global_p()|>
|
||
|
bold_p() |>
|
||
|
bold_labels() |>
|
||
|
italicize_levels()
|
||
|
|
||
|
## Multivariate
|
||
|
mul<-dta |>
|
||
|
dplyr::select(all_of(vars_nihss))|>
|
||
|
lm(nihss_0~pase_0+.,
|
||
|
data = _) |>
|
||
|
tbl_regression(
|
||
|
#label = lab_sel(labels_all,vars),
|
||
|
#intercept=FALSE
|
||
|
)|>
|
||
|
add_n() |>
|
||
|
add_global_p() |>
|
||
|
bold_p() |>
|
||
|
bold_labels() |>
|
||
|
italicize_levels()
|
||
|
|
||
|
## Merge
|
||
|
tbl_merge(
|
||
|
tbls = list(biv, mul),
|
||
|
tab_spanner = c("**Bivariate linear regression**",
|
||
|
"**Multivariate linear regression**")
|
||
|
)
|
||
|
|
||
|
|
||
|
```
|
||
|
|
||
|
|
||
|
|
||
|
# Transformed data
|
||
|
```{r}
|
||
|
|
||
|
# dta<-dta_backup
|
||
|
dta<-dta|>
|
||
|
mutate(pase_0=sqrt(pase_0),
|
||
|
nihss_0=log1p(nihss_0),
|
||
|
mdi_1_enr=log1p(mdi_1_enr), # log1p(x) svarer til log(x+1)
|
||
|
mdi_6_newobs_enr=log1p(mdi_6_newobs_enr))|>
|
||
|
data.frame()
|
||
|
|
||
|
library(dplyr)
|
||
|
# source("biv_mul.R")
|
||
|
|
||
|
```
|
||
|
|
||
|
```{r}
|
||
|
## =============================================================================
|
||
|
## Loop
|
||
|
## =============================================================================
|
||
|
|
||
|
bm_list<-list()
|
||
|
|
||
|
|
||
|
|
||
|
for (i in 1:length(outs)){
|
||
|
## Bivariate
|
||
|
|
||
|
dta_l<-dta|>
|
||
|
dplyr::select(all_of(c("active_treat",vars,outs[i])))
|
||
|
|
||
|
sel<-dta_l|>
|
||
|
sapply(is.factor)
|
||
|
|
||
|
# i=1
|
||
|
biv<-dta_l|>
|
||
|
tbl_uvregression(data=_,
|
||
|
y=outs[i],
|
||
|
method=lm,
|
||
|
label = lab_sel(labels_all,vars),
|
||
|
show_single_row=colnames(dta_l)[sel]
|
||
|
) |>
|
||
|
add_global_p()|>
|
||
|
bold_p() |>
|
||
|
bold_labels() |>
|
||
|
italicize_levels()
|
||
|
|
||
|
## What follows is the pragmatic transformation and reformatting
|
||
|
|
||
|
## Transforming log2() to 2^()
|
||
|
# biv$table_body$estimate<-expm1(biv$table_body$estimate)
|
||
|
# biv$table_body$conf.low<-expm1(biv$table_body$conf.low)
|
||
|
# biv$table_body$conf.high<-expm1(biv$table_body$conf.high)
|
||
|
#
|
||
|
# ## Transforming sqrt(pase_0) to pase_0^2
|
||
|
# biv$table_body$estimate[biv$table_body$variable=="pase_0"]<-
|
||
|
# biv$table_body$estimate[biv$table_body$variable=="pase_0"]^2
|
||
|
#
|
||
|
# low<-biv$table_body$conf.low[biv$table_body$variable=="pase_0"]^2
|
||
|
# high<-biv$table_body$conf.high[biv$table_body$variable=="pase_0"]^2
|
||
|
#
|
||
|
# biv$table_body$conf.low[biv$table_body$variable=="pase_0"]<-high
|
||
|
# biv$table_body$conf.high[biv$table_body$variable=="pase_0"]<-low
|
||
|
#
|
||
|
# ## New confidence intervals
|
||
|
# # biv$table_body$estimate<-format(biv$table_body$estimate, drop0trailing = F,digits =2)
|
||
|
# biv$table_body$ci<-paste(formatC(biv$table_body$conf.low, digits = 3, format = "f"),
|
||
|
# formatC(biv$table_body$conf.high, digits = 3, format = "f"),
|
||
|
# sep=", ")
|
||
|
|
||
|
# biv$table_body$ci<-paste(format(biv$table_body$conf.low, digits = 2, drop0trailing = FALSE),
|
||
|
# format(biv$table_body$conf.high, digits = 2, drop0trailing = FALSE),
|
||
|
# sep=", ")
|
||
|
|
||
|
# Formatting from this: https://stackoverflow.com/questions/12243071/r-keeping-0-0-when-using-paste-or-paste0
|
||
|
|
||
|
## multivariate
|
||
|
mul<-dta_l |>
|
||
|
lm(formula(paste(c(outs[i],"."),collapse="~")),
|
||
|
data = _) |>
|
||
|
tbl_regression(label = lab_sel(labels_all,vars),
|
||
|
show_single_row=colnames(dta_l)[sel]
|
||
|
)|>
|
||
|
add_n() |>
|
||
|
add_global_p() |>
|
||
|
bold_p() |>
|
||
|
bold_labels() |>
|
||
|
italicize_levels()
|
||
|
|
||
|
## Merge
|
||
|
biv_mul<-tbl_merge(
|
||
|
tbls = list(biv, mul),
|
||
|
tab_spanner = c("**Bivariate linear regression**",
|
||
|
"**Multivariate linear regression**")
|
||
|
)
|
||
|
|
||
|
bm_list[[i]]<-biv_mul
|
||
|
}
|
||
|
|
||
|
## =============================================================================
|
||
|
## Big merge
|
||
|
## =============================================================================
|
||
|
|
||
|
bm_16_tbl<-tbl_merge(
|
||
|
tbls = bm_list,
|
||
|
tab_spanner = c("**One month follow up**",
|
||
|
"**Six months follow up**")
|
||
|
)
|
||
|
bm_16_tbl
|
||
|
```
|
||
|
|
||
|
|
||
|
```{r}
|
||
|
bm_16_tbl_rtf <- file("bm_16_tbl_trans.RTF", "w")
|
||
|
writeLines(bm_16_tbl%>%as_gt()%>%as_rtf(), bm_16_tbl_rtf)
|
||
|
close(bm_16_tbl_rtf)
|
||
|
|
||
|
bm_16_tbl %>% # build gtsummary table
|
||
|
as_gt() %>% # convert to gt table
|
||
|
gt::gtsave( # save table as image
|
||
|
filename = "bm_16_tbl_trans.png"
|
||
|
)
|
||
|
```
|
||
|
|
||
|
## Tests and visualisation
|
||
|
|
||
|
Box-Cox power transformation performs comparably to logarithmic transformation. The latter is much easier to explain.
|
||
|
|
||
|
```{r}
|
||
|
library(MASS)
|
||
|
dta_bc<-dta_backup|>
|
||
|
dplyr::select(all_of(c("mdi_6_newobs_enr",vars)))|>
|
||
|
mutate(pase_0=sqrt(pase_0),
|
||
|
mdi_6_newobs_enr=mdi_6_newobs_enr+1)#|>
|
||
|
# na.omit()
|
||
|
|
||
|
bc<-boxcox(mdi_6_newobs_enr~.,data=dta_bc)
|
||
|
lambda <- bc$x[which.max(bc$y)]
|
||
|
```
|
||
|
|
||
|
Q-Q plots to compare the two different approaches, and the non-transformed
|
||
|
|
||
|
```{r}
|
||
|
qqnorm(lm(((mdi_6_newobs_enr^lambda-1)/lambda) ~ .,data=dta_bc)$residuals)
|
||
|
|
||
|
qqnorm(lm(log(mdi_6_newobs_enr) ~ .,data=dta_bc)$residuals)
|
||
|
```
|
||
|
|
||
|
Histograms for reference
|
||
|
|
||
|
```{r}
|
||
|
hist(dta$pase_0,40); hist(sqrt(dta$pase_0),40)
|
||
|
```
|
||
|
|
||
|
```{r}
|
||
|
hist(expm1(dta$mdi_6_newobs_enr),40); hist((dta$mdi_6_newobs_enr),40) ## Observed MDI, and log transformed MDI
|
||
|
```
|
||
|
|
||
|
|
||
|
|