daDoctoR/R/rep_glm.R

100 lines
2.7 KiB
R
Raw Normal View History

2018-10-03 10:32:10 +02:00
#' A repeated logistic regression function
2018-10-02 21:07:43 +02:00
#'
2018-10-04 09:23:14 +02:00
#' @description For bivariate analyses. The confint() function is rather slow, causing the whole function to hang when including many predictors and calculating the ORs with CI.
2018-10-04 10:10:35 +02:00
#' @param meas Effect meassure. Input as c() of columnnames, use dput().
2018-10-04 09:23:14 +02:00
#' @param vars variables in model. Input as c() of columnnames, use dput().
#' @param string variables to test. Input as c() of columnnames, use dput().
2018-10-04 10:10:35 +02:00
#' @param ci flag to get results as OR with 95% confidence interval.
#' @param data data frame to pull variables from.
2018-10-02 21:07:43 +02:00
#' @keywords logistic regression
2018-10-04 12:07:23 +02:00
#' @export
2018-10-02 21:07:43 +02:00
#' @examples
2018-10-04 10:35:05 +02:00
#' l<-5
#' y<-factor(rep(c("a","b"),l))
#' x<-rnorm(length(y), mean=50, sd=10)
#' v1<-factor(rep(c("r","s"),length(y)/2))
#' v2<-sample(1:100, length(y), replace=FALSE)
#' v3<-as.numeric(1:length(y))
#' d<-data.frame(y,x,v1,v2,v3)
#' preds<-dput(names(d)[3:ncol(d)])
#' rep_glm(meas="y",vars="x",string=preds,ci=FALSE,data=df)
2018-10-02 21:07:43 +02:00
2018-10-04 10:10:35 +02:00
rep_glm<-function(meas,vars,string,ci=FALSE,data){
2018-10-04 09:23:14 +02:00
## x is data.frame of predictors, y is vector of an aoutcome as a factor
## output is returned as coefficient, or if or=TRUE as OR with 95 % CI.
##
2018-10-03 10:32:10 +02:00
2018-10-04 10:12:07 +02:00
require(broom)
2018-10-03 10:32:10 +02:00
2018-10-02 21:07:43 +02:00
d<-data
2018-10-04 10:15:33 +02:00
x<-data.frame(d[,c(string)])
v<-data.frame(d[,c(vars)])
2018-10-04 10:10:35 +02:00
y<-d[,c(meas)]
2018-10-04 09:23:14 +02:00
dt<-cbind(y,v)
m1<-length(coef(glm(y~.,family = binomial(),data = dt)))
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if (!is.factor(y)){stop("Some kind of error message would be nice, but y should be a factor!")}
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if (ci==TRUE){
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
df<-data.frame(matrix(ncol = 3))
names(df)<-c("pred","or_ci","pv")
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
for(i in 1:ncol(x)){
dat<-cbind(dt,x[,i])
m<-glm(y~.,family = binomial(),data=dat)
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
l<-suppressMessages(round(exp(confint(m))[-c(1:m1),1],2))
u<-suppressMessages(round(exp(confint(m))[-c(1:m1),2],2))
or<-round(exp(coef(m))[-c(1:m1)],2)
or_ci<-paste0(or," (",l," to ",u,")")
pv<-round(tidy(m)$p.value[-c(1:m1)],3)
x1<-x[,i]
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if (is.factor(x1)){
2018-10-04 10:10:35 +02:00
pred<-paste(names(x)[i],levels(x1)[-1],sep = "_")}
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
else {pred<-names(x)[i]}
2018-10-03 10:32:10 +02:00
2018-10-04 10:10:35 +02:00
df<-rbind(df,cbind(pred,or_ci,pv))}}
2018-10-03 10:32:10 +02:00
2018-10-02 21:07:43 +02:00
if (ci==FALSE){
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
df<-data.frame(matrix(ncol = 3))
names(df)<-c("pred","b","pv")
for(i in 1:ncol(x)){
dat<-cbind(dt,x[,i])
m<-glm(y~.,family = binomial(),data=dat)
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
b<-round(coef(m)[-c(1:m1)],3)
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
pv<-round(tidy(m)$p.value[-c(1:m1)],3)
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
x1<-x[,i]
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
if (is.factor(x1)){
2018-10-04 09:26:37 +02:00
pred<-paste(names(x)[i],levels(x1)[-1],sep = "_")
2018-10-04 09:23:14 +02:00
}
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
else {pred<-names(x)[i]}
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
df<-rbind(df,cbind(pred,b,pv))
}}
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
pa<-as.numeric(df[,3])
pa<-ifelse(pa<0.001,"<0.001",pa)
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
t <- ifelse(pa<=0.1|pa=="<0.001","include","drop")
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
pa <- ifelse(pa<=0.05|pa=="<0.001",paste0("*",pa),
ifelse(pa>0.05&pa<=0.1,paste0(".",pa),pa))
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
r<-data.frame(df[,1:2],pa,t)[-1,]
2018-10-03 10:32:10 +02:00
2018-10-04 09:23:14 +02:00
return(r)
2018-10-02 21:07:43 +02:00
}
2018-10-04 10:35:05 +02:00