2021-03-30 13:31:17 +02:00
#' Regression model of predictors according to STROBE, bi- and multivariable.
2018-10-11 16:31:46 +02:00
#'
2019-11-13 12:08:32 +01:00
#' Printable table of regression model according to STROBE for linear or binary outcome-variables.
2021-03-30 13:31:17 +02:00
#' Includes both bivariate and multivariate in the same table.
2019-11-13 12:08:32 +01:00
#' Output is a list, with the first item being the main "output" as a dataframe.
2021-03-30 13:31:17 +02:00
#' Automatically uses logistic regression model for dichotomous outcome variable and linear regression model for continuous outcome variable. Linear regression will give estimated adjusted true mean in list.
2019-12-13 12:39:42 +01:00
#' For logistic regression gives count of outcome variable pr variable level.
2021-03-30 13:31:17 +02:00
#' @param meas binary outcome measure variable, column name in data.frame as a string. Can be numeric or factor. Result is calculated accordingly.
2018-10-11 16:31:46 +02:00
#' @param adj variables to adjust for, as string.
#' @param data dataframe of data.
#' @param dec decimals for results, standard is set to 2. Mean and sd is dec-1.
2021-03-30 13:31:17 +02:00
#' @param n.by.adj flag to indicate whether to count number of patients in adjusted model or overall for outcome measure not NA.
2019-11-13 10:05:53 +01:00
#' @param p.val flag to include p-values in table, set to FALSE as standard.
2018-10-11 16:31:46 +02:00
#' @keywords logistic
#' @export
2019-11-12 14:12:27 +01:00
strobe_pred <- function ( meas , adj , data , dec = 2 , n.by.adj = FALSE , p.val = FALSE ) {
2018-10-11 16:31:46 +02:00
2021-03-30 13:31:17 +02:00
## Wish list:
## - SPEED, maybe flags to include/exclude time consuming tasks
## - Include ANOVA in output list, flag to include
2018-10-11 16:31:46 +02:00
require ( dplyr )
d <- data
m <- d [ , c ( meas ) ]
ads <- d [ , c ( adj ) ]
2019-11-11 17:16:25 +01:00
if ( is.factor ( m ) ) {
2019-12-13 11:58:21 +01:00
## Crude ORs
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
dfcr <- data.frame ( matrix ( NA , ncol = 3 ) )
names ( dfcr ) <- c ( " pred" , " or_ci" , " pv" )
n.mn <- c ( )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
nref <- c ( )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
for ( i in 1 : ncol ( ads ) ) {
dat <- data.frame ( m = m , ads [ , i ] )
names ( dat ) <- c ( " m" , names ( ads ) [i ] )
mn <- glm ( m ~ .,family = binomial ( ) , data = dat )
n.mn <- c ( n.mn , nrow ( mn $ model ) )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
suppressMessages ( ci <- exp ( confint ( mn ) ) )
l <- round ( ci [ -1 , 1 ] , dec )
u <- round ( ci [ -1 , 2 ] , dec )
or <- round ( exp ( coef ( mn ) ) [ -1 ] , dec )
or_ci <- paste0 ( or , " (" , l , " to " , u , " )" )
pv <- round ( tidy ( mn ) $ p.value [ -1 ] , dec +1 )
x1 <- ads [ , i ]
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
if ( is.factor ( x1 ) ) {
pred <- paste0 ( names ( ads ) [i ] , levels ( x1 ) [ -1 ] )
}
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
else {
pred <- names ( ads ) [i ]
}
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
dfcr <- rbind ( dfcr , cbind ( pred , or_ci , pv ) )
}
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
## Mutually adjusted ORs
2018-10-12 11:26:20 +02:00
2019-12-13 11:58:21 +01:00
dat <- data.frame ( m = m , ads )
ma <- glm ( m ~ .,family = binomial ( ) , data = dat )
miss <- length ( ma $ na.action )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
actable <- coef ( summary ( ma ) )
pa <- actable [ , 4 ]
pa <- ifelse ( pa < 0.001 , " <0.001" , round ( pa , 3 ) )
pa <- ifelse ( pa <= 0.05 | pa == " <0.001" , paste0 ( " *" , pa ) ,
ifelse ( pa > 0.05 & pa <= 0.1 , paste0 ( " ." , pa ) , pa ) )
apv <- pa [1 : length ( coef ( ma ) ) ]
aco <- round ( exp ( coef ( ma ) ) , dec )
suppressMessages ( aci <- round ( exp ( confint ( ma ) ) , dec ) )
alo <- aci [ , 1 ]
aup <- aci [ , 2 ]
aor_ci <- paste0 ( aco , " (" , alo , " to " , aup , " )" )
# names(dat2)<-c(var,names(ads))
nq <- c ( )
nall <- length ( ! is.na ( dat [ , 1 ] ) )
if ( n.by.adj == TRUE ) {
2019-12-13 12:39:42 +01:00
dat2 <- ma $ model
2019-12-13 11:58:21 +01:00
# nalt<-nrow(dat2)
2019-12-13 12:39:42 +01:00
for ( i in 2 : ncol ( dat2 ) ) {
if ( is.factor ( dat2 [ , i ] ) ) {
vec <- dat2 [ , i ]
ns <- names ( dat2 ) [i ]
for ( r in 1 : length ( levels ( vec ) ) ) {
vr <- levels ( vec ) [r ]
## Counting all included in analysis
n <- length ( vec [vec == vr & ! is.na ( vec ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
## Counting all included in analysis with outcome
lvl <- levels ( dat2 [ , 1 ] ) [2 ]
no <- length ( vec [vec == vr & dat2 [ , 1 ] == lvl & ! is.na ( vec ) ] )
ro <- paste0 ( no , " (" , round ( no / n * 100 , 0 ) , " %)" )
## Combining
nq <- rbind ( nq , cbind ( paste0 ( ns , levels ( vec ) [r ] ) , rt , ro ) )
}
}
if ( ! is.factor ( dat2 [ , i ] ) ) {
num <- dat2 [ , i ]
n <- length ( num [ ! is.na ( num ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
nq <- rbind ( nq , cbind ( names ( dat2 ) [i ] , rt , ro = " -" ) )
}
}
2019-12-13 11:58:21 +01:00
}
else {
2019-12-13 12:39:42 +01:00
dat2 <- dat [ ! is.na ( dat [ , 1 ] ) , ]
for ( i in 2 : ncol ( dat2 ) ) {
2019-12-13 11:58:21 +01:00
if ( is.factor ( dat2 [ , i ] ) ) {
vec <- dat2 [ , i ]
ns <- names ( dat2 ) [i ]
for ( r in 1 : length ( levels ( vec ) ) ) {
vr <- levels ( vec ) [r ]
2019-12-13 12:39:42 +01:00
## Counting all included in analysis
2019-12-13 11:58:21 +01:00
n <- length ( vec [vec == vr & ! is.na ( vec ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
2019-12-13 12:39:42 +01:00
## Counting all included in analysis with outcome
lvl <- levels ( dat2 [ , 1 ] ) [2 ]
no <- length ( vec [vec == vr & dat2 [ , 1 ] == lvl & ! is.na ( vec ) ] )
ro <- paste0 ( no , " (" , round ( no / n * 100 , 0 ) , " %)" )
## Combining
nq <- rbind ( nq , cbind ( paste0 ( ns , levels ( vec ) [r ] ) , rt , ro ) )
2019-12-13 11:58:21 +01:00
}
}
if ( ! is.factor ( dat2 [ , i ] ) ) {
num <- dat2 [ , i ]
n <- length ( num [ ! is.na ( num ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
2019-12-13 12:39:42 +01:00
nq <- rbind ( nq , cbind ( names ( dat2 ) [i ] , rt , ro = " -" ) )
2019-12-13 11:58:21 +01:00
}
}
}
rnames <- c ( )
2018-10-12 11:26:20 +02:00
for ( i in 1 : ncol ( dat2 ) ) {
if ( is.factor ( dat2 [ , i ] ) ) {
2019-12-13 11:58:21 +01:00
rnames <- c ( rnames , names ( dat2 ) [i ] , paste0 ( names ( dat2 ) [i ] , levels ( dat2 [ , i ] ) ) )
}
2018-10-12 11:26:20 +02:00
if ( ! is.factor ( dat2 [ , i ] ) ) {
2019-12-13 11:58:21 +01:00
rnames <- c ( rnames , paste0 ( names ( dat2 ) [i ] , " .all" ) , names ( dat2 ) [i ] )
}
2018-10-11 16:31:46 +02:00
}
2019-12-13 11:58:21 +01:00
res <- cbind ( aor_ci , apv )
rest <- data.frame ( names = row.names ( res ) , res , stringsAsFactors = F )
2018-10-11 16:31:46 +02:00
2019-12-13 12:39:42 +01:00
numb <- data.frame ( names = nq [ , 1 ] , N = nq [ , 2 ] , N.out = nq [ , 3 ] , stringsAsFactors = F )
namt <- data.frame ( names = tail ( rnames , -3 ) , stringsAsFactors = F )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
coll <- left_join ( left_join ( namt , numb , by = " names" ) , rest , by = " names" )
2018-10-11 16:31:46 +02:00
2019-12-13 12:39:42 +01:00
header <- data.frame ( matrix ( paste0 ( " Chance of " , meas , " is " , levels ( m ) [2 ] ) , ncol = ncol ( coll ) ) , stringsAsFactors = F )
2019-12-13 11:58:21 +01:00
names ( header ) <- names ( coll )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
df <- data.frame ( rbind ( header , coll ) , stringsAsFactors = F )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
names ( dfcr ) [1 ] <- c ( " names" )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
suppressWarnings ( re <- left_join ( df , dfcr , by = " names" ) )
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
rona <- c ( )
for ( i in 1 : length ( ads ) ) {
if ( is.factor ( ads [ , i ] ) ) {
rona <- c ( rona , names ( ads [i ] ) , levels ( ads [ , i ] ) ) }
2019-11-13 11:11:26 +01:00
if ( ! is.factor ( ads [ , i ] ) ) {
rona <- c ( rona , names ( ads [i ] ) , " Per unit increase" )
}
2019-11-13 11:24:20 +01:00
}
2019-11-13 11:11:26 +01:00
2019-12-13 11:58:21 +01:00
if ( p.val == TRUE ) {
2019-12-13 12:39:42 +01:00
ref <- data.frame ( c ( NA , rona ) , re [ , " N" ] , re [ , " N.out" ] , re [ , " or_ci" ] , re [ , " pv" ] , re [ , " aor_ci" ] , re [ , " apv" ] )
2018-10-11 16:31:46 +02:00
2019-12-13 12:39:42 +01:00
names ( ref ) <- c ( " Variable" , paste0 ( " N=" , nall ) , paste0 ( " N, " , meas , " is " , levels ( m ) [2 ] ) , " Crude OR (95 % CI)" , " p-value" , " Mutually adjusted OR (95 % CI)" , " A p-value" )
2019-12-13 11:58:21 +01:00
}
else {
2019-12-13 12:39:42 +01:00
ref <- data.frame ( c ( NA , rona ) , re [ , " N" ] , re [ , " N.out" ] , re [ , " or_ci" ] , re [ , " aor_ci" ] )
2019-11-13 10:05:53 +01:00
2019-12-13 12:39:42 +01:00
names ( ref ) <- c ( " Variable" , paste0 ( " N=" , nall ) , paste0 ( " N, " , meas , " is " , levels ( m ) [2 ] ) , " Crude OR (95 % CI)" , " Mutually adjusted OR (95 % CI)" )
2019-12-13 11:58:21 +01:00
}
2018-10-11 16:31:46 +02:00
2019-12-13 11:58:21 +01:00
ls <- list ( tbl = ref , miss , nall , nrow ( d ) )
names ( ls ) <- c ( " Printable table" , " Deleted due to missingness in adjusted analysis" , " Number of outcome observations" , " Length of dataframe" )
2019-11-11 17:16:25 +01:00
}
if ( ! is.factor ( m ) ) {
dfcr <- data.frame ( matrix ( NA , ncol = 3 ) )
2019-11-12 14:12:27 +01:00
names ( dfcr ) <- c ( " pred" , " dif_ci" , " pv" )
2019-11-11 17:16:25 +01:00
n.mn <- c ( )
nref <- c ( )
for ( i in 1 : ncol ( ads ) ) {
dat <- data.frame ( m = m , ads [ , i ] )
names ( dat ) <- c ( " m" , names ( ads ) [i ] )
mn <- lm ( m ~ .,data = dat )
n.mn <- c ( n.mn , nrow ( mn $ model ) )
suppressMessages ( ci <- confint ( mn ) )
l <- round ( ci [ -1 , 1 ] , dec )
u <- round ( ci [ -1 , 2 ] , dec )
2019-11-12 14:12:27 +01:00
dif <- round ( coef ( mn ) [ -1 ] , dec )
dif_ci <- paste0 ( dif , " (" , l , " to " , u , " )" )
2019-11-11 17:16:25 +01:00
pv <- round ( tidy ( mn ) $ p.value [ -1 ] , dec +1 )
2019-11-12 14:12:27 +01:00
pv <- ifelse ( pv < 0.001 , " <0.001" , round ( pv , 3 ) )
pv <- ifelse ( pv <= 0.05 | pv == " <0.001" , paste0 ( " *" , pv ) ,
ifelse ( pv > 0.05 & pv <= 0.1 , paste0 ( " ." , pv ) , pv ) )
2019-11-11 17:16:25 +01:00
x1 <- ads [ , i ]
if ( is.factor ( x1 ) ) {
pred <- paste0 ( names ( ads ) [i ] , levels ( x1 ) [ -1 ] )
}
else {
pred <- names ( ads ) [i ]
}
2019-11-12 14:12:27 +01:00
dfcr <- rbind ( dfcr , cbind ( pred , dif_ci , pv ) )
2019-11-11 17:16:25 +01:00
}
## Mutually adjusted ORs
dat <- data.frame ( m = m , ads )
ma <- lm ( m ~ ., data = dat )
miss <- length ( ma $ na.action )
2019-11-12 14:12:27 +01:00
2019-11-11 17:16:25 +01:00
actable <- coef ( summary ( ma ) )
pa <- actable [ , 4 ]
pa <- ifelse ( pa < 0.001 , " <0.001" , round ( pa , 3 ) )
pa <- ifelse ( pa <= 0.05 | pa == " <0.001" , paste0 ( " *" , pa ) ,
ifelse ( pa > 0.05 & pa <= 0.1 , paste0 ( " ." , pa ) , pa ) )
apv <- pa [1 : length ( coef ( ma ) ) ]
aco <- round ( coef ( ma ) , dec )
suppressMessages ( aci <- round ( confint ( ma ) , dec ) )
alo <- aci [ , 1 ]
aup <- aci [ , 2 ]
amean_ci <- paste0 ( aco , " (" , alo , " to " , aup , " )" )
2019-11-12 14:12:27 +01:00
mean_est <- amean_ci [ [1 ] ]
2019-11-11 17:16:25 +01:00
nq <- c ( )
2019-12-13 11:58:21 +01:00
nall <- length ( ! is.na ( dat [ , 1 ] ) )
2019-11-11 17:16:25 +01:00
if ( n.by.adj == TRUE ) {
dat2 <- ma $ model [ , -1 ]
2019-12-13 11:58:21 +01:00
# nalt<-nrow(dat2)
2019-11-11 17:16:25 +01:00
for ( i in 1 : ncol ( dat2 ) ) {
if ( is.factor ( dat2 [ , i ] ) ) {
vec <- dat2 [ , i ]
ns <- names ( dat2 ) [i ]
for ( r in 1 : length ( levels ( vec ) ) ) {
vr <- levels ( vec ) [r ]
2019-12-13 11:58:21 +01:00
n <- length ( vec [vec == vr & ! is.na ( vec ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
nq <- rbind ( nq , cbind ( paste0 ( ns , levels ( vec ) [r ] ) , rt ) )
2019-11-11 17:16:25 +01:00
} }
if ( ! is.factor ( dat2 [ , i ] ) ) {
num <- dat2 [ , i ]
n <- as.numeric ( length ( num [ ! is.na ( num ) ] ) )
2019-12-13 11:58:21 +01:00
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
nq <- rbind ( nq , cbind ( names ( dat2 ) [i ] , rt ) )
} }
}
2019-11-11 17:16:25 +01:00
else {
dat2 <- dat [ ! is.na ( dat [ , 1 ] ) , ] [ , -1 ]
2019-12-13 11:58:21 +01:00
for ( i in 1 : ncol ( dat2 ) ) {
if ( is.factor ( dat2 [ , i ] ) ) {
vec <- dat2 [ , i ]
ns <- names ( dat2 ) [i ]
for ( r in 1 : length ( levels ( vec ) ) ) {
vr <- levels ( vec ) [r ]
n <- length ( vec [vec == vr & ! is.na ( vec ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
nq <- rbind ( nq , cbind ( paste0 ( ns , levels ( vec ) [r ] ) , rt ) )
}
}
if ( ! is.factor ( dat2 [ , i ] ) ) {
num <- dat2 [ , i ]
n <- length ( num [ ! is.na ( num ) ] )
rt <- paste0 ( n , " (" , round ( n / nall * 100 , 0 ) , " %)" )
nq <- rbind ( nq , cbind ( names ( dat2 ) [i ] , rt ) )
}
}
}
2019-11-11 17:16:25 +01:00
rnames <- c ( )
2019-11-13 10:43:25 +01:00
for ( i in 1 : ncol ( dat2 ) ) {
if ( is.factor ( dat2 [ , i ] ) ) {
rnames <- c ( rnames , names ( dat2 ) [i ] , paste0 ( names ( dat2 ) [i ] , levels ( dat2 [ , i ] ) ) )
2019-11-11 17:16:25 +01:00
}
2019-11-13 10:43:25 +01:00
if ( ! is.factor ( dat2 [ , i ] ) ) {
rnames <- c ( rnames , paste0 ( names ( dat2 ) [i ] , " .all" ) , names ( dat2 ) [i ] )
2019-11-11 17:16:25 +01:00
}
}
res <- cbind ( amean_ci , apv )
rest <- data.frame ( names = row.names ( res ) , res , stringsAsFactors = F )
2019-12-13 11:58:21 +01:00
numb <- data.frame ( names = nq [ , 1 ] , N = nq [ , 2 ] , stringsAsFactors = F )
2019-11-11 17:16:25 +01:00
namt <- data.frame ( names = rnames , stringsAsFactors = F )
coll <- left_join ( left_join ( namt , numb , by = " names" ) , rest , by = " names" )
header <- data.frame ( matrix ( " Adjusted" , ncol = ncol ( coll ) ) , stringsAsFactors = F )
names ( header ) <- names ( coll )
df <- data.frame ( rbind ( header , coll ) , stringsAsFactors = F )
names ( dfcr ) [1 ] <- c ( " names" )
suppressWarnings ( re <- left_join ( df , dfcr , by = " names" ) )
2019-11-13 11:11:26 +01:00
rona <- c ( )
for ( i in 1 : length ( ads ) ) {
if ( is.factor ( ads [ , i ] ) ) {
2019-11-13 11:24:20 +01:00
rona <- c ( rona , names ( ads [i ] ) , levels ( ads [ , i ] ) ) }
2019-12-13 11:58:21 +01:00
if ( ! is.factor ( ads [ , i ] ) ) {
rona <- c ( rona , names ( ads [i ] ) , " Per unit increase" )
2019-11-13 11:24:20 +01:00
}
2019-12-13 11:58:21 +01:00
}
2019-11-13 11:11:26 +01:00
2019-11-12 14:12:27 +01:00
if ( p.val == TRUE ) {
2019-11-13 11:11:26 +01:00
ref <- data.frame ( c ( NA , rona ) , re [ , 2 ] , re [ , 5 ] , re [ , 6 ] , re [ , 3 ] , re [ , 4 ] )
2019-11-11 17:16:25 +01:00
2019-12-13 11:58:21 +01:00
names ( ref ) <- c ( " Variable" , paste0 ( " N=" , nall ) , " Difference (95 % CI)" , " p-value" , " Mutually adjusted difference (95 % CI)" , " A p-value" )
2019-11-12 14:12:27 +01:00
}
else {
2019-11-13 11:11:26 +01:00
ref <- data.frame ( c ( NA , rona ) , re [ , 2 ] , re [ , 5 ] , re [ , 3 ] )
2019-11-12 14:12:27 +01:00
2019-12-13 11:58:21 +01:00
names ( ref ) <- c ( " Variable" , paste0 ( " N=" , nall ) , " Difference (95 % CI)" , " Mutually adjusted difference (95 % CI)" )
2019-11-12 14:12:27 +01:00
}
2019-11-11 17:16:25 +01:00
2019-12-13 11:58:21 +01:00
ls <- list ( tbl = ref , miss , nall , nrow ( d ) , mean_est )
2019-11-12 14:12:27 +01:00
names ( ls ) <- c ( " Printable table" , " Deleted due to missingness in adjusted analysis" , " Number of outcome observations" , " Length of dataframe" , " Estimated true mean (95 % CI) in adjusted analysis" )
2019-11-11 17:16:25 +01:00
}
2018-10-12 11:26:20 +02:00
return ( ls )
2018-10-11 16:31:46 +02:00
}