daDoctoR/R/cie_test.R

75 lines
2.1 KiB
R
Raw Normal View History

2018-10-02 21:07:43 +02:00
#' A repeated regression function for change-in-estimate analysis
#'
2018-10-04 11:59:17 +02:00
#' For bivariate analyses. From "Modeling and variable selection in epidemiologic analysis." - S. Greenland, 1989.
2018-10-04 12:50:26 +02:00
#' @param meas Effect meassure. Input as c() of columnnames, use dput().
#' @param vars variables in model. Input as c() of columnnames, use dput().
#' @param string variables to test. Input as c() of columnnames, use dput().
#' @param data data frame to pull variables from.
#' @param logistic flag to set logistic (TRUE) or linear (FALSE,standard) analysis.
#' @param cut cut value for gating if including or dropping the tested variable. As suggested bu S. Greenland (1989).
2018-10-02 21:07:43 +02:00
#' @keywords change-in-estimate
2018-10-04 12:07:23 +02:00
#' @export
2018-10-02 21:07:43 +02:00
#' @examples
2018-10-04 12:39:26 +02:00
#' cie_test()
2018-10-02 21:07:43 +02:00
2018-10-04 11:59:17 +02:00
cie_test<-function(meas,vars,string,data,logistic=FALSE,cut=0.1){
2018-10-02 21:07:43 +02:00
require(broom)
2018-10-02 21:07:43 +02:00
d<-data
2018-10-04 11:59:17 +02:00
x<-data.frame(d[,c(string)])
v<-data.frame(d[,c(vars)])
names(v)<-c(vars)
y<-d[,c(meas)]
dt<-cbind(y,v)
2018-10-04 11:59:17 +02:00
c<-as.numeric(cut)
2018-10-04 11:59:17 +02:00
if(logistic==FALSE){
2018-10-04 11:59:17 +02:00
if (is.factor(y)){stop("Logistic is flagged as FALSE, but the provided meassure is formatted as a factor!")}
2018-10-04 11:59:17 +02:00
e<-as.numeric(round(coef(lm(y~.,data = dt)),3))[1]
2018-10-02 21:07:43 +02:00
df<-data.frame(pred="base",b=e)
2018-10-02 21:07:43 +02:00
for(i in 1:ncol(x)){
2018-10-04 11:59:17 +02:00
dat<-cbind(dt,x[,i])
m<-lm(y~.,data=dat)
2018-10-02 21:07:43 +02:00
b<-as.numeric(round(coef(m),3))[1]
2018-10-04 11:59:17 +02:00
pred<-paste(names(x)[i])
2018-10-04 11:59:17 +02:00
df<-rbind(df,cbind(pred,b)) }
2018-10-04 11:59:17 +02:00
di<-as.vector(abs(e-as.numeric(df[-1,2]))/e)
dif<-c(NA,di)
t<-c(NA,ifelse(di>=c,"include","drop"))
r<-cbind(df,dif,t) }
2018-10-02 21:07:43 +02:00
if(logistic==TRUE){
2018-10-04 11:59:17 +02:00
if (!is.factor(y)){stop("Logistic is flagged as TRUE, but the provided meassure is NOT formatted as a factor!")}
2018-10-04 11:59:17 +02:00
e<-as.numeric(round(exp(coef(glm(y~.,family=binomial(),data=dt))),3))[1]
2018-10-02 21:07:43 +02:00
df<-data.frame(pred="base",b=e)
2018-10-02 21:07:43 +02:00
for(i in 1:ncol(x)){
2018-10-04 11:59:17 +02:00
dat<-cbind(dt,x[,i])
m<-glm(y~.,family=binomial(),data=dat)
2018-10-02 21:07:43 +02:00
b<-as.numeric(round(exp(coef(m)),3))[1]
2018-10-04 11:59:17 +02:00
pred<-paste(names(x)[i])
2018-10-04 11:59:17 +02:00
df<-rbind(df,cbind(pred,b)) }
2018-10-02 21:07:43 +02:00
2018-10-04 11:59:17 +02:00
di<-as.vector(abs(e-as.numeric(df[-1,2]))/e)
dif<-c(NA,di)
t<-c(NA,ifelse(di>=c,"include","drop"))
r<-cbind(df,dif,t)
}
return(r)
2018-10-02 21:07:43 +02:00
}