2018-10-03 10:32:10 +02:00
#' A repeated logistic regression function
2018-10-02 21:07:43 +02:00
#'
2018-10-04 09:23:14 +02:00
#' @description For bivariate analyses. The confint() function is rather slow, causing the whole function to hang when including many predictors and calculating the ORs with CI.
2018-10-04 10:10:35 +02:00
#' @param meas Effect meassure. Input as c() of columnnames, use dput().
2018-10-04 09:23:14 +02:00
#' @param vars variables in model. Input as c() of columnnames, use dput().
2018-10-04 21:06:22 +02:00
#' @param str variables to test. Input as c() of columnnames, use dput().
2018-10-04 10:10:35 +02:00
#' @param ci flag to get results as OR with 95% confidence interval.
2018-10-04 21:06:22 +02:00
#' @param dta data frame to pull variables from.
2018-10-02 21:07:43 +02:00
#' @keywords logistic regression
2018-10-04 12:07:23 +02:00
#' @export
2018-10-02 21:07:43 +02:00
#' @examples
2018-10-04 21:06:22 +02:00
#' l<-50
#' y<-factor(rep(c("a","b"),l))
#' x<-rnorm(length(y), mean=50, sd=10)
#' v1<-factor(rep(c("r","s"),length(y)/2))
#' v2<-sample(1:100, length(y), replace=FALSE)
#' v3<-as.numeric(1:length(y))
#' d<-data.frame(y,x,v1,v2,v3)
#' preds<-c("v1","v2","x")
#' rep_glm(meas="y",vars="v3",string=preds,ci=F,data=d)
2018-10-02 21:07:43 +02:00
2018-10-04 10:10:35 +02:00
rep_glm <- function ( meas , vars , string , ci = FALSE , data ) {
2018-10-03 10:32:10 +02:00
2018-10-04 10:12:07 +02:00
require ( broom )
2018-10-03 10:32:10 +02:00
2018-10-02 21:07:43 +02:00
d <- data
2018-10-04 10:15:33 +02:00
x <- data.frame ( d [ , c ( string ) ] )
v <- data.frame ( d [ , c ( vars ) ] )
2018-10-04 21:06:22 +02:00
names ( v ) <- c ( vars )
2018-10-04 10:10:35 +02:00
y <- d [ , c ( meas ) ]
2018-10-04 09:23:14 +02:00
dt <- cbind ( y , v )
m1 <- length ( coef ( glm ( y ~ .,family = binomial ( ) , data = dt ) ) )
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if ( ! is.factor ( y ) ) { stop ( " Some kind of error message would be nice, but y should be a factor!" ) }
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if ( ci == TRUE ) {
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
df <- data.frame ( matrix ( ncol = 3 ) )
names ( df ) <- c ( " pred" , " or_ci" , " pv" )
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
for ( i in 1 : ncol ( x ) ) {
dat <- cbind ( dt , x [ , i ] )
m <- glm ( y ~ .,family = binomial ( ) , data = dat )
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
l <- suppressMessages ( round ( exp ( confint ( m ) ) [ - c ( 1 : m1 ) , 1 ] , 2 ) )
u <- suppressMessages ( round ( exp ( confint ( m ) ) [ - c ( 1 : m1 ) , 2 ] , 2 ) )
or <- round ( exp ( coef ( m ) ) [ - c ( 1 : m1 ) ] , 2 )
or_ci <- paste0 ( or , " (" , l , " to " , u , " )" )
pv <- round ( tidy ( m ) $ p.value [ - c ( 1 : m1 ) ] , 3 )
x1 <- x [ , i ]
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if ( is.factor ( x1 ) ) {
2018-10-04 10:10:35 +02:00
pred <- paste ( names ( x ) [i ] , levels ( x1 ) [ -1 ] , sep = " _" ) }
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
else { pred <- names ( x ) [i ] }
2018-10-03 10:32:10 +02:00
2018-10-04 10:10:35 +02:00
df <- rbind ( df , cbind ( pred , or_ci , pv ) ) } }
2018-10-03 10:32:10 +02:00
2018-10-02 21:07:43 +02:00
if ( ci == FALSE ) {
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
df <- data.frame ( matrix ( ncol = 3 ) )
names ( df ) <- c ( " pred" , " b" , " pv" )
for ( i in 1 : ncol ( x ) ) {
dat <- cbind ( dt , x [ , i ] )
m <- glm ( y ~ .,family = binomial ( ) , data = dat )
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
b <- round ( coef ( m ) [ - c ( 1 : m1 ) ] , 3 )
pv <- round ( tidy ( m ) $ p.value [ - c ( 1 : m1 ) ] , 3 )
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
x1 <- x [ , i ]
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if ( is.factor ( x1 ) ) {
2018-10-04 09:26:37 +02:00
pred <- paste ( names ( x ) [i ] , levels ( x1 ) [ -1 ] , sep = " _" )
2018-10-04 09:23:14 +02:00
}
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
else { pred <- names ( x ) [i ] }
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
df <- rbind ( df , cbind ( pred , b , pv ) )
} }
2018-10-03 10:32:10 +02:00
2018-10-09 14:03:56 +02:00
pa <- as.numeric ( df [ , " pv" ] )
t <- ifelse ( pa <= 0.1 , " include" , " drop" )
2018-10-03 10:32:10 +02:00
2018-10-09 14:03:56 +02:00
pa <- ifelse ( pa < 0.001 , " <0.001" , pa )
2018-10-04 09:23:14 +02:00
pa <- ifelse ( pa <= 0.05 | pa == " <0.001" , paste0 ( " *" , pa ) ,
ifelse ( pa > 0.05 & pa <= 0.1 , paste0 ( " ." , pa ) , pa ) )
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
r <- data.frame ( df [ , 1 : 2 ] , pa , t ) [ -1 , ]
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
return ( r )
2018-10-02 21:07:43 +02:00
}
2018-10-04 10:35:05 +02:00
2018-10-04 21:06:22 +02:00