variable selection ond vector export for rep_lm

2025-04-21 20:32:34 +02:00 · 2021-06-14 11:33:57 +02:00 · 2021-06-14 11:33:57 +02:00 · 77930fce38
commit 77930fce38
parent 46db760722
4 changed files with 27 additions and 10 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 Package: daDoctoR
 Title: Functions For Health Research
-Version: 0.21.9
+Version: 0.21.10
 Year: 2021
 Author: Andreas Gammelgaard Damsbo <agdamsbo@pm.me>
 Maintainer: Andreas Gammelgaard Damsbo <agdamsbo@pm.me>
--- a/R/rep_lm.R
+++ b/R/rep_lm.R
@ -1,6 +1,7 @@
 #' A repeated linear regression function
 #'
 #' For bivariate analyses, to determine which variables to include in adjusted model.
+#' Output is a list with two elements: data frame with test results and vector of variable names (from 'string') to include determined by set cutoff ('cut.p').
 #' @param meas Effect meassure. Input as c() of columnnames, use dput().
 #' @param vars variables in model. Input as c() of columnnames, use dput().
 #' @param string variables to test. Input as c() of columnnames, use dput().
@ -10,7 +11,7 @@
 #' @keywords linear regression
 #' @export

-rep_lm<-function(meas,vars=NULL,string,ci=FALSE,data,fixed.var=FALSE){
+rep_lm<-function(meas,vars=NULL,string,ci=FALSE,data,fixed.var=FALSE,cut.p=0.1){

  require(broom)
  y<-data[,c(meas)]
@ -44,7 +45,7 @@ rep_lm<-function(meas,vars=NULL,string,ci=FALSE,data,fixed.var=FALSE){
        x1<-x[,i]

        if (is.factor(x1)){
-          pred<-paste(names(x)[i],levels(x1)[-1],sep = "_")
+          pred<-paste(names(x)[i],levels(x1)[-1],sep = ".")
        }

        else {pred<-names(x)[i]}
@ -68,14 +69,14 @@ rep_lm<-function(meas,vars=NULL,string,ci=FALSE,data,fixed.var=FALSE){
        x1<-x[,i]

        if (is.factor(x1)){
-          pred<-paste(names(x)[i],levels(x1)[-1],sep = "_")
+          pred<-paste(names(x)[i],levels(x1)[-1],sep = ".")
        }
        else {pred<-names(x)[i]}
        df<-rbind(df,cbind(pred,b,pv))
      }}

    pa<-as.numeric(df[,3])
-    t <- ifelse(pa<=0.1,"include","drop")
+    t <- ifelse(pa<=cut.p,"include","drop")
    pa<-ifelse(pa<0.001,"<0.001",pa)
    pa <- ifelse(pa<=0.05|pa=="<0.001",paste0("*",pa),
                 ifelse(pa>0.05&pa<=0.1,paste0(".",pa),pa))
@ -112,7 +113,7 @@ rep_lm<-function(meas,vars=NULL,string,ci=FALSE,data,fixed.var=FALSE){
        x1<-x[,i]

        if (is.factor(x1)){
-          pred<-paste(names(x)[i],levels(x1)[-1],sep = "_")}
+          pred<-paste(names(x)[i],levels(x1)[-1],sep = ".")}

        else {pred<-names(x)[i]}

@ -132,19 +133,25 @@ rep_lm<-function(meas,vars=NULL,string,ci=FALSE,data,fixed.var=FALSE){
        x1<-x[,i]

        if (is.factor(x1)){
-          pred<-paste(names(x)[i],levels(x1)[-1],sep = "_")
+          pred<-paste(names(x)[i],levels(x1)[-1],sep = ".")
        }
        else {pred<-names(x)[i]}
        df<-rbind(df,cbind(pred,b,pv))
      }}

    pa<-as.numeric(df[,3])
-    t <- ifelse(pa<=0.1,"include","drop")
+    t <- ifelse(pa<=cut.p,"include","drop")
    pa<-ifelse(pa<0.001,"<0.001",pa)
    pa <- ifelse(pa<=0.05|pa=="<0.001",paste0("*",pa),
                 ifelse(pa>0.05&pa<=0.1,paste0(".",pa),pa))

    r<-data.frame(df[,1:2],pa,t)[-1,]
  }
-  return(r)
+
+  p<-r$pred[r$t=="include"]
+  s<-c()
+  for (i in 1:length(p)){
+    s<-c(s,unlist(strsplit(p[i], "[.]"))[1])
+  }
+  return(list(tests=r,to_include=unique(s)))
 }
--- a/README.md
+++ b/README.md
@ -15,6 +15,7 @@ I have used these functions in different ways in my work on publishing rasearch
 I'm currently working on improving the code to be more universal
 - Include sample data to use with special functions
 - Include coding examples with all functions
+- Add functionality for variable selection to otger 'rep_' functions, like 'rep_lm'.

 ## New functions
 - Test for confounders and/or effect modifiers
--- a/man/rep_lm.Rd
+++ b/man/rep_lm.Rd
@ -4,7 +4,15 @@
 \alias{rep_lm}
 \title{A repeated linear regression function}
 \usage{
-rep_lm(meas, vars = NULL, string, ci = FALSE, data, fixed.var = FALSE)
+rep_lm(
+  meas,
+  vars = NULL,
+  string,
+  ci = FALSE,
+  data,
+  fixed.var = FALSE,
+  cut.p = 0.1
+)
 }
 \arguments{
 \item{meas}{Effect meassure. Input as c() of columnnames, use dput().}
@ -21,6 +29,7 @@ rep_lm(meas, vars = NULL, string, ci = FALSE, data, fixed.var = FALSE)
 }
 \description{
 For bivariate analyses, to determine which variables to include in adjusted model.
+Output is a list with two elements: data frame with test results and vector of variable names (from 'string') to include determined by set cutoff ('cut.p').
 }
 \keyword{linear}
 \keyword{regression}