Add files via upload

2025-02-18 17:32:36 +01:00 · 2018-10-02 21:07:43 +02:00 · 2018-10-02 21:07:43 +02:00 · b2e02094b6
commit b2e02094b6
parent 8f819d3089
30 changed files with 1210 additions and 0 deletions
--- a/11
+++ b/11
@ -0,0 +1,11 @@
 Package: daDoctoR
 Title: Research toolbox
 Version: 1.0.0
 Authors@R: c(person("Andreas", "Gammelgaard Damsbo",, email = "agdamsbo@protonmail.com", role = c("aut","cre", "cph")))
 Description: A collection of functions to ease my work. I'll try to keep it updated for others to use as well.
 Depends: R (>= 3.4.4)
 Imports: broom
 License: GPL (>= 2)
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 6.1.0.9000
--- a/16
+++ b/16
@ -0,0 +1,16 @@
 # Generated by roxygen2: do not edit by hand
 export(age_calc)
 export(cie_test)
 export(col_fact)
 export(col_num)
 export(cpr_check)
 export(cpr_sex)
 export(date_convert)
 export(dob_extract_cpr)
 export(hwe_allele)
 export(hwe_geno)
 export(hwe_sum)
 export(rep_biv)
 export(rep_glm)
 export(rep_lm)
--- a/R/age_calc_function.R
+++ b/R/age_calc_function.R
@ -0,0 +1,76 @@
 #' Calculating age from date of birth
 #'
 #' For age calculations.
 #' @param dob Date of birth.
 #' @param enddate Date to calculate age at.
 #' @keywords age
 #' @export
 #' @examples
 #' age_calc()
 age_calc<-function (dob, enddate = Sys.Date(), units = "years", precise = TRUE) 
 ## Build upon the work of Jason P. Becker, as part of tihe eeptools
  {
  if (!inherits(dob, "Date") | !inherits(enddate, "Date")) {
    stop("Both dob and enddate must be Date class objects")
  }
  if (enddate < dob) {
    stop("End date must be a date after date of birth")
  }
  start <- as.POSIXlt(dob)
  end <- as.POSIXlt(enddate)
  if (precise) {
    start_is_leap <- ifelse(start$year%%400 == 0, TRUE, ifelse(start$year%%100 == 
                                                                 0, FALSE, ifelse(start$year%%4 == 0, TRUE, FALSE)))
    end_is_leap <- ifelse(end$year%%400 == 0, TRUE, ifelse(end$year%%100 == 
                                                             0, FALSE, ifelse(end$year%%4 == 0, TRUE, FALSE)))
  }
  if (units == "days") {
    result <- difftime(end, start, units = "days")
  }
  else if (units == "months") {
    months <- sapply(mapply(seq, as.POSIXct(start), as.POSIXct(end), 
                            by = "months", SIMPLIFY = FALSE), length) - 1
    if (precise) {
      month_length_end <- ifelse(end$mon == 1 & end_is_leap, 
                                 29, ifelse(end$mon == 1, 28, ifelse(end$mon %in% 
                                                                       c(3, 5, 8, 10), 30, 31)))
      month_length_prior <- ifelse((end$mon - 1) == 1 & 
                                     start_is_leap, 29, ifelse((end$mon - 1) == 1, 
                                                               28, ifelse((end$mon - 1) %in% c(3, 5, 8, 10), 
                                                                          30, 31)))
      month_frac <- ifelse(end$mday > start$mday, (end$mday - 
                                                     start$mday)/month_length_end, ifelse(end$mday < 
                                                                                            start$mday, (month_length_prior - start$mday)/month_length_prior + 
                                                                                            end$mday/month_length_end, 0))
      result <- months + month_frac
    }
    else {
      result <- months
    }
  }
  else if (units == "years") {
    years <- sapply(mapply(seq, as.POSIXct(start), as.POSIXct(end), 
                           by = "years", SIMPLIFY = FALSE), length) - 1
    if (precise) {
      start_length <- ifelse(start_is_leap, 366, 365)
      end_length <- ifelse(end_is_leap, 366, 365)
      start_day <- ifelse(start_is_leap & start$yday >= 
                            60, start$yday - 1, start$yday)
      end_day <- ifelse(end_is_leap & end$yday >= 60, end$yday - 
                          1, end$yday)
      year_frac <- ifelse(start_day < end_day, (end_day - 
                                                  start_day)/end_length, ifelse(start_day > end_day, 
                                                                                (start_length - start_day)/start_length + end_day/end_length, 
                                                                                0))
      result <- years + year_frac
    }
    else {
      result <- years
    }
  }
  else {
    stop("Unrecognized units. Please choose years, months, or days.")
  }
  return(result)
 }
--- a/R/cie_test.R
+++ b/R/cie_test.R
@ -0,0 +1,165 @@
 #' A repeated regression function for change-in-estimate analysis
 #'
 #' For bivariate analyses.
 #' @param y Effect meassure.
 #' @param v1 Main variable in model
 #' @param string String of columnnames from dataframe to include. Use dput().
 #' @keywords change-in-estimate
 #' @export
 #' @examples
 #' cie_test()
 cie_test<-function(y,v1,string,data,logistic=FALSE,cut=0.1,v2=NULL,v3=NULL){
 ## Calculating variables, that should be included for a change in estimate analysis. 
 ## v1-3 are possible locked variables, y is the outcome vector. 
 ## String defines variables to test, and is provided as vector of variable names. Use dput().
 ## From "Modeling and variable selection in epidemiologic analysis." - S. Greenland, 1989.
  require(broom)
  d<-data
  x<-select(d,one_of(c(string)))
  if(logistic==FALSE){
 if (is.factor(y)){stop("Some kind of error message would be nice, but y should not be a factor!")}
 if (is.null(v2)&is.null(v3)){     
  e<-as.numeric(round(coef(lm(y~v1)),3))[1]
       df<-data.frame(pred="base",b=e)
  for(i in 1:ncol(x)){
     m<-lm(y~v1+x[,i])
     b<-as.numeric(round(coef(m),3))[1]
     v<-x[,i]
   pred<-paste(names(x)[i])
     df<-rbind(df,cbind(pred,b))
  }
       t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
 df<-cbind(df,t)  
       }
       if (!is.null(v2)&is.null(v3)){   
           e<-as.numeric(round(coef(lm(y~v1+v2)),3))[1]
       df<-data.frame(pred="base",b=e)
  for(i in 1:ncol(x)){
     m<-lm(y~v1+v2+x[,i])
     b<-as.numeric(round(coef(m),3))[1]
     v<-x[,i]
       pred<-paste(names(x)[i])
     df<-rbind(df,cbind(pred,b))
  }
       t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
 df<-cbind(df,t)  
       }
       if (!is.null(v2)&!is.null(v3)){   
           e<-as.numeric(round(coef(lm(y~v1+v2+v3)),3))[1]
       df<-data.frame(pred="base",b=e)
  for(i in 1:ncol(x)){
     m<-lm(y~v1+v2+v3+x[,i])
     b<-as.numeric(round(coef(m),3))[1]
     v<-x[,i]
       pred<-paste(names(x)[i])
     df<-rbind(df,cbind(pred,b))
  }
       t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
 df<-cbind(df,t)  
 }}
 if(logistic==TRUE){
 if (!is.factor(y)){stop("Some kind of error message would be nice, but y should be a factor!")}
 if (is.null(v2)&is.null(v3)){     
  e<-as.numeric(round(exp(coef(glm(y~v1,family=binomial()))),3))[1]
       df<-data.frame(pred="base",b=e)
  for(i in 1:ncol(x)){
     m<-glm(y~v1+x[,i],family=binomial())
     b<-as.numeric(round(exp(coef(m)),3))[1]
     v<-x[,i]
   pred<-paste(names(x)[i])
     df<-rbind(df,cbind(pred,b))
  }
       t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
 df<-cbind(df,t)  
       }
       if (!is.null(v2)&is.null(v3)){   
           e<-as.numeric(round(exp(coef(glm(y~v1+v2,family=binomial()))),3))[1]
       df<-data.frame(pred="base",b=e)
  for(i in 1:ncol(x)){
     m<-glm(y~v1+v2+x[,i],family=binomial())
     b<-as.numeric(round(exp(coef(m)),3))[1]
     v<-x[,i]
       pred<-paste(names(x)[i])
     df<-rbind(df,cbind(pred,b))
  }
       t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
 df<-cbind(df,t)  
       }
       if (!is.null(v2)&!is.null(v3)){   
           e<-as.numeric(round(exp(coef(glm(y~v1+v2+v3,family=binomial()))),3))[1]
       df<-data.frame(pred="base",b=e)
  for(i in 1:ncol(x)){
     m<-glm(y~v1+v2+v3+x[,i],family=binomial())
     b<-as.numeric(round(exp(coef(m)),3))[1]
     v<-x[,i]
       pred<-paste(names(x)[i])
     df<-rbind(df,cbind(pred,b))
  }
       t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
 df<-cbind(df,t)  
 }}
    return(df)
 }
--- a/R/col_fact.R
+++ b/R/col_fact.R
@ -0,0 +1,40 @@
 #' Formatting multiple columns as factor
 #'
 #' Depending on dply's contains()-function.
 #' @param string Columnnames containg strings.
 #' @param data Dataframe
 #' @param levels Levels for all selected columns
 #' @param labels labels for all selected columns
 #' @keywords factor
 #' @export
 #' @examples
 #' col_fact()
 col_fact<-function(string,data,levels=NULL,labels=NULL){
 ## Defining factors for columns containing string (can be vector of multiple strings), based on dplyr.
 ## Factoring several columns with same levels or labels, these can be provided.
  require(dplyr)
  d<-data
  s<-string
  n<-c()
  for(i in 1:length(s)){
  n<-c(n,names(select(d,contains(s[i]))))
  }
  if (!is.null(levels)){
    for(i in 1:length(n)) {
    d[,n[i]]<-factor(d[,n[i]],levels=levels)}}
    if (!is.null(labels)){ 
      for(i in 1:length(n)) {
        d[,n[i]]<-factor(d[,n[i]],labels=labels)
  }}
      else 
        for(i in 1:length(n)) {
          d[,n[i]]<-factor(d[,n[i]])}
  return(d)
 }
--- a/R/col_num.R
+++ b/R/col_num.R
@ -0,0 +1,27 @@
 #' Formatting multiple columns as numeric
 #'
 #' Depending on dply's contains()-function.
 #' @param string Columnnames containg strings.
 #' @param data Dataframe
 #' @keywords numeric
 #' @export
 #' @examples
 #' col_num()
 col_num<-function(string,data){
 ## Defining factors for columns containing string (can be vector of multiple strings), based on dplyr
  require(dplyr)
  d<-data
  s<-string
  n<-c()
  for(i in 1:length(s)){
  n<-c(n,names(select(d,contains(s[i]))))
  }
  for(i in 1:length(n)) {
    d[,n[i]]<-as.numeric(d[,n[i]])
  }
  return(d)
 }
--- a/R/cpr_check_function.R
+++ b/R/cpr_check_function.R
@ -0,0 +1,26 @@
 #' CPR check
 #'
 #' Checking validity of cpr number
 #' @param x cpr as "ddmmyy-xxxx".
 #' @keywords cpr
 #' @export
 #' @examples
 #' cpr_check()
 cpr_check<-function(x){
  #Check validity of CPR number, format ddmmyy-xxxx
  p1<-as.integer(substr(x,1,1))
  p2<-as.integer(substr(x,2,2))
  p3<-as.integer(substr(x,3,3))
  p4<-as.integer(substr(x,4,4))
  p5<-as.integer(substr(x,5,5))
  p6<-as.integer(substr(x,6,6))
  p7<-as.integer(substr(x,8,8))
  p8<-as.integer(substr(x,9,9))
  p9<-as.integer(substr(x,10,10))
  p10<-as.integer(substr(x,11,11))
  result<-ifelse((p1*4+p2*3+p3*2+p4*7+p5*6+p6*5+p7*4+p8*3+p9*2+p10) %% 11 == 0,"valid","invalid")
  return(result)
 }
--- a/R/cpr_sex_function.R
+++ b/R/cpr_sex_function.R
@ -0,0 +1,16 @@
 #' Determine sex from CPR
 #'
 #' Format "ddmmyy-xxxx"
 #' @param x cpr as "ddmmyy-xxxx".
 #' @keywords cpr
 #' @export
 #' @examples
 #' cpr_sex()
 cpr_sex<-function(x){ 
  ##Input as vector of DK cpr numbers, format "ddmmyy-xxxx", returns sex according to cpr
  last<-as.integer(substr(x, start = 11, stop = 11))
  sex<-ifelse(last %% 2 == 0, "female", "male")
  return(sex)
 }
--- a/R/date_convert_function.R
+++ b/R/date_convert_function.R
@ -0,0 +1,15 @@
 #' Converting data format
 #'
 #' Input format as dd/mm/yyyy, output is standard yyyy-mm-dd
 #' @param x data as as dd/mm/yyyy.
 #' @keywords date
 #' @export
 #' @examples
 #' date_convert()
 date_convert<-function(x)
  ## Input format as dd/mm/yyyy, output is standard yyyy-mm-dd  
  {
      result<-as.Date(x, format="%d/%m/%Y")
 print(result)  
 }
--- a/R/dob_extract_cpr_function.R
+++ b/R/dob_extract_cpr_function.R
@ -0,0 +1,62 @@
 #' Extracting date of birth from CPR
 #'
 #' For easy calculation.
 #' @param cpr cpr-numbers in format ddmmyy-xxxx.
 #' @keywords cpr
 #' @export
 #' @examples
 #' dob_extract_cpr()
 dob_extract_cpr<-function(cpr)
 ## Input as cpr-numbers in format ddmmyy-xxxx
 ## Build upon data from this document: https://cpr.dk/media/167692/personnummeret%20i%20cpr.pdf
 ## example vector: fsd<-c("010190-2000", "010115-4000", "010189-6000","010189-3000","010150-6000","010150-4000")
  {
  if (any(substr(cpr,7,7)%in%c(0:9))){stop("Input format should be ddmmyy-xxxx")} # test if input is ddmmyyxxxx
  else {
  dobs<-c()
  a00<-as.numeric(c(0:99))
  a36<-as.numeric(c(0:36))
  a57<-as.numeric(c(0:57))
  b00<-as.numeric(c(0,1,2,3))
  b36<-as.numeric(c(4,9))
  b57<-as.numeric(c(5,6,7,8))
  for (x in cpr)
  {
  p56<-as.numeric(substr(x,5,6))
  p8<-as.numeric(substr(x,8,8))
  birth<-as.Date(substr(x,1,6),format="%d%m%y")
  if (((p56%in%a00)&&(p8%in%b00))) 
    {
    dob<-as.Date(format(birth, format="19%y%m%d"), format="%Y%m%d")
    }
  else if (((p56%in%a36)&&(p8%in%b36))) 
    {
    dob<-as.Date(format(birth, format="20%y%m%d"), format="%Y%m%d")
    }
  else if ((!(p56%in%a36)&&(p8%in%b36))) 
    {
    dob<-as.Date(format(birth, format="19%y%m%d"), format="%Y%m%d")
    }
  else if (((p56%in%a57)&&(p8%in%b57))) 
    {
    dob<-as.Date(format(birth, format="20%y%m%d"), format="%Y%m%d")
    }
  else if ((!(p56%in%a57)&&(p8%in%b57))) 
    {
    dob<-as.Date(format(birth, format="18%y%m%d"), format="%Y%m%d")
    }
  else {print("Input contains data in wrong format") # test if position 5,6 or 8 contains letters as is the case for temporary cpr-numbers
    }
  dobs<-append(dobs,dob)
  }
  return(dobs)
 }
 }
--- a/R/hwe_allele.R
+++ b/R/hwe_allele.R
@ -0,0 +1,129 @@
 #' HWE calculation by alleles
 #'
 #' For easy calculation.
 #' @param x Allele 1.
 #' @param y Allele 2.
 #' @keywords hardy-weinberg-equllibrium
 #' @export
 #' @examples
 #' hwe_allele()
 hwe_allele<-function(x,y)
 {
 ## Witten by Andreas Gammelgaard Damsbo, agdamsbo@pm.me, based on a non-working
 ## applet at from http://www.husdyr.kvl.dk/htm/kc/popgen/genetik/applets/kitest.htm
  all<-pmax(length(levels(factor(x))),length(levels(factor(y))))
  if(all==2){
    df=1
    ## Biallellic system, df=1
    al1<-factor(x,labels=c("p","q"))
    al2<-factor(y,labels=c("p","q"))
    snp<-paste(al1,al2,sep = "_")
    snp[snp=="q_p"]<-"p_q"
    snp_f<-factor(snp,levels=c("p_p", "p_q", "q_q"))
    a<-length(snp)*2
    b<-length(snp)
    p<-(length(al1[al1=="p"])+length(al2[al2=="p"]))/a
    q<-(length(al1[al1=="q"])+length(al2[al2=="q"]))/a
    al_dist<-round(rbind(cbind(p*a,q*a),cbind(p*100,q*100)),1)
    al_names<-levels(factor(x))
    p_p<-round(p^2*b,3)
    p_q<-round(2*p*q*b,3)
    q_q<-round(q^2*b,3)
    hwe<-data.frame(obs=summary(snp_f),exp=rbind(p_p, p_q, q_q))
    hwe$dev<-hwe$obs-hwe$exp
    hwe$chi<-hwe$dev^2/hwe$exp
    snp_obs<-round(rbind(summary(snp_f),summary(snp_f)/length(snp_f)*100),1)
    snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
    gen_names<-c(
      paste(levels(factor(x))[1],levels(factor(x))[1],sep="_"),
      paste(levels(factor(x))[1],levels(factor(x))[2],sep="_"),
      paste(levels(factor(x))[2],levels(factor(x))[2],sep="_"))
    chi<-sum(hwe$chi,na.rm = TRUE)
    p_v<-pchisq(chi, df=df, lower.tail=FALSE)
  }
  else if(all==3){
    df=3
    ## Triallellic system, df=3
    al1<-factor(x,labels=c("p","q","r"))
    al2<-factor(y,labels=c("p","q","r"))
    snp<-paste(al1,al2,sep = "_")
    snp[snp=="r_p"]<-"p_r"
    snp[snp=="r_q"]<-"q_r"
    snp[snp=="q_p"]<-"p_q"
    snp_f<-factor(snp,levels=c("p_p", "p_q", "q_q","p_r","q_r", "r_r"))
    a<-length(snp)*2
    b<-length(snp)
    p<-(length(al1[al1=="p"])+length(al2[al2=="p"]))/a
    q<-(length(al1[al1=="q"])+length(al2[al2=="q"]))/a
    r<-(length(al1[al1=="r"])+length(al2[al2=="r"]))/a
    al_dist<-round(rbind(cbind(p*a,q*a,r*a),cbind(p*100,q*100,r*100)),1)
    al_names<-levels(factor(x))
    p_p<-round(p^2*b,3)
    p_q<-round(2*p*q*b,3)
    q_q<-round(q^2*b,3)
    p_r<-round(2*r*p*b,3)
    q_r<-round(2*q*r*b,3)
    r_r<-round(r^2*b,3)
    hwe<-data.frame(obs=summary(snp_f),exp=rbind(p_p, p_q, q_q, p_r, q_r, r_r))
    hwe$dev<-hwe$obs-hwe$exp
    hwe$chi<-hwe$dev^2/hwe$exp
    snp_obs<-round(rbind(summary(snp_f),summary(snp_f)/length(snp_f)*100),1)
    snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
    gen_names<-c(
      paste(levels(factor(x))[1],levels(factor(x))[1],sep="_"),
      paste(levels(factor(x))[1],levels(factor(x))[2],sep="_"),
      paste(levels(factor(x))[2],levels(factor(x))[2],sep="_"),
      paste(levels(factor(x))[1],levels(factor(x))[3],sep="_"),
      paste(levels(factor(x))[2],levels(factor(x))[3],sep="_"),
      paste(levels(factor(x))[3],levels(factor(x))[3],sep="_"))
    chi<-sum(hwe$chi,na.rm = TRUE)
    p_v<-pchisq(chi, df=df, lower.tail=FALSE)
  }
  else if (!any(all==c(2,3))){stop("This formula only works for bi- or triallellic systems")}
  else {stop("There was an unknown error")}
  colnames(al_dist)<-al_names
  colnames(snp_obs)<-gen_names
  colnames(snp_exp)<-gen_names
  rn<-c("N","%")
  rownames(al_dist)<-rn
  rownames(snp_obs)<-rn
  rownames(snp_exp)<-rn
  int<-ifelse(p_v<=0.05,"The null-hypothesis of difference from the HWE can be confirmed","The null-hypothesis of difference from the HWE can be rejected")
  t1<-"Chi-square test for Hardy-Weinberg equillibrium for a bi- or triallellic system. Read more: http://www.husdyr.kvl.dk/htm/kc/popgen/genetics/2/2.htm"
  list<-list(info=t1,n.total=b,allele.dist=al_dist,observed.dist=snp_obs,expected.dist=snp_exp,chi.value=chi,p.value=p_v,df=df,interpretation=int)
  return(list)
 }
--- a/R/hwe_geno.R
+++ b/R/hwe_geno.R
@ -0,0 +1,103 @@
 #' HWE calculation by genotypes
 #'
 #' For easy calculation.
 #' @param mm First count of genotype.
 #' @keywords hardy-weinberg-equllibrium
 #' @export
 #' @examples
 #' hwe_geno()
 hwe_geno<-function(mm,mn,nn,mo,no,oo,alleles=2)
 {
  ## x is the number of common homozygote, y the heterozygote and z the rare homozygote.
  if (alleles==2){
    ## Biallelic has three degrees of freedom
    df=1
    a<-sum(mm,mn,nn)*2
    b<-sum(mm,mn,nn)
    p<-(2*mm+mn)/a
    q<-(2*nn+mn)/a
    al_dist<-round(rbind(cbind(p*a,q*a),cbind(p*100,q*100)),1)
    al_names<-c("m","n")
    p_p<-round(p^2*b,3)
    p_q<-round(2*p*q*b,3)
    q_q<-round(q^2*b,3)
    obs=rbind(mm,mn,nn)
    exp=rbind(p_p, p_q, q_q)
    hwe<-data.frame(obs,exp)
    hwe$dev<-hwe$obs-hwe$exp
    hwe$chi<-hwe$dev^2/hwe$exp
    snp_obs<-round(rbind(hwe$obs,hwe$obs/sum(hwe$obs)*100),1)
    snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
    gen_names<-c("mm","mn","nn")
    chi<-sum(hwe$chi,na.rm = TRUE)
    p_v<-pchisq(chi, df=df, lower.tail=FALSE)
  }
  if(alleles==3){
    ## Triallelic has three degrees of freedom
    df=3
    a<-sum(mm,mn,nn,mo,no,oo)*2
    b<-sum(mm,mn,nn,mo,no,oo)
    p<-(2*mm+mn+mo)/a
    q<-(2*nn+mn+no)/a
    r<-(2*oo+no+mo)/a
    al_dist<-round(rbind(cbind(p*a,q*a,r*a),cbind(p*100,q*100,r*100)),1)
    al_names<-c("m","n","o")
    p_p<-round(p^2*b,3)
    p_q<-round(2*p*q*b,3)
    q_q<-round(q^2*b,3)
    p_r<-round(2*r*p*b,3)
    q_r<-round(2*q*r*b,3)
    r_r<-round(r^2*b,3)
    obs=rbind(mm,mn,nn,mo,no,oo)
    exp=rbind(p_p, p_q, q_q, p_r, q_r, r_r)
    hwe<-data.frame(obs,exp)
    hwe$dev<-hwe$obs-hwe$exp
    hwe$chi<-hwe$dev^2/hwe$exp
    snp_obs<-round(rbind(hwe$obs,hwe$obs/sum(hwe$obs)*100),1)
    snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
    gen_names<-c("mm","mn","nn", "mo", "no", "oo")
    chi<-sum(hwe$chi,na.rm = TRUE)
    p_v<-pchisq(chi, df=df, lower.tail=FALSE)    
  }
  colnames(al_dist)<-al_names
  colnames(snp_obs)<-gen_names
  colnames(snp_exp)<-gen_names
  rownames(al_dist)<-c("N","%")
  rownames(snp_obs)<-c("N","%")
  rownames(snp_exp)<-c("N","%")
  int<-ifelse(p_v<=0.05,"The null-hypothesis of difference from the HWE can be confirmed","The null-hypothesis of difference from the HWE can be rejected")
  t1<-"Chi-square test for Hardy-Weinberg equillibrium for a bi- or triallellic system. Theory: http://www.husdyr.kvl.dk/htm/kc/popgen/genetics/2/2.htm"
  list<-list(info=t1,n.total=b,allele.dist=al_dist,observed.dist=snp_obs,expected.dist=snp_exp,chi.value=chi,p.value=p_v,df=df,interpretation=int)
  return(list)
 }
--- a/R/hwe_sum.R
+++ b/R/hwe_sum.R
@ -0,0 +1,34 @@
 #' Summarising function of HWE calculation
 #'
 #' For easy printing.
 #' @param a1 Allele 1.
 #' @param a2 Allele 2.
 #' @param f factor for grouping.
 #' @keywords hardy-weinberg-equllibrium
 #' @export
 #' @examples
 #' hwe_sum()
 hwe_sum<-function(a1,a2,f){
 ## HWE summarising function, for several groups defined by factor f. Alleles are provided as vectors, a1 and a2.
 source("https://raw.githubusercontent.com/agdamsbo/research/master/hwe_allele.R")
 lst<-list()
 df<-data.frame(cbind(a1,a2))
  for (i in 1:length(ls<-split(df,f))){
  grp<-names(ls)[i]
  obs<-data.frame(hwe_allele(ls[[i]][,1],ls[[i]][,2])[[c("observed.dist")]])
  pval<-round(hwe_allele(ls[[i]][,1],ls[[i]][,2])[[c("p.value")]],3)
  prnt<-paste0(obs[1,]," (",obs[2,],")")
  names(prnt)<-names(obs)
  lst<-list(lst,grp,obs.dist=obs,print=prnt,hwe.pv=pval)
  }
 return(lst)
 }
--- a/R/rep_biv.R
+++ b/R/rep_biv.R
@ -0,0 +1,27 @@
 #' A repeated function 
 #'
 #' For bivariate analyses, for gating by p-value or change-in-estimate.
 #' @param y Effect meassure.
 #' @param v1 Main variable in model
 #' @keywords logistic regression
 #' @export
 #' @examples
 #' rep_biv()
 rep_biv<-function(y,v1,string,data,method="pval",logistic=FALSE,ci=FALSE,cut=0.1,v2=NULL,v3=NULL){
 require(rep_lm)
  require(rep_glm)
  require(cie_test)
 if (method=="pval"&logistic==FALSE){
 rep_lm(y=y,v1=v1,string=string,data=data,ci=ci)
 }
 if (method=="pval"&logistic==TRUE){
 rep_lm(y=y,v1=v1,string=string,data=data,ci=ci)
 }
 if (method=="cie"){
 cie_test(y=y,v1=v1,string=string,data=data,logistic=logistic,cut=cut,v2=v2,v3=v3)
 }
 return(df)
 }
--- a/R/rep_glm.R
+++ b/R/rep_glm.R
@ -0,0 +1,91 @@
 #' A repeated logistic regression function 
 #'
 #' For bivariate analyses.
 #' @param y Effect meassure.
 #' @param v1 Main variable in model
 #' @keywords logistic regression
 #' @export
 #' @examples
 #' rep_glm()
 rep_glm<-function(y,v1,string,ci=FALSE,data,v2=NULL,v3=NULL){
 ## x is data.frame of predictors, y is vector of an aoutcome as a factor
 ## output is returned as coefficient, or if or=TRUE as OR with 95 % CI.
 ## The confint() function is rather slow, causing the whole function to hang when including many predictors and calculating the ORs with CI.
  require(broom)
  d<-data
  x<-select(d,one_of(c(string)))
  m1<-length(coef(glm(y~v1,family = binomial())))
 if (!is.factor(y)){stop("Some kind of error message would be nice, but y should be a factor!")}
    if (ci==TRUE){
             df<-data.frame(matrix(ncol = 4))
  names(df)<-c("pred","or_ci","pv","t")
      for(i in 1:ncol(x)){
     m<-glm(y~v1+x[,i],family = binomial())
     l<-suppressMessages(round(exp(confint(m))[-c(1:m1),1],2))
     u<-suppressMessages(round(exp(confint(m))[-c(1:m1),2],2))
     or<-round(exp(coef(m))[-c(1:m1)],2)
     or_ci<-paste0(or," (",l," to ",u,")")
     pv<-round(tidy(m)$p.value[-c(1:m1)],3)
     pv<-ifelse(pv<0.001,"<0.001",pv)
         t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
     pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
                  ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
    v<-x[,i]
    if (is.factor(v)){
       pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
    }
       else {pred<-names(x)[i]}
      df<-rbind(df,cbind(pred,or_ci,pv,t))
    }}
  if (ci==FALSE){
       df<-data.frame(matrix(ncol = 4))
  names(df)<-c("pred","b","pv","t")
  for(i in 1:ncol(x)){
     m<-glm(y~v1+x[,i],family = binomial())
     b<-round(coef(m)[-c(1:m1)],3)
     pv<-round(tidy(m)$p.value[-c(1:m1)],3)
     pv<-ifelse(pv<0.001,"<0.001",pv)
    t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
     pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
                  ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
     v<-x[,i]
     if (is.factor(v)){
       pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
     }
       else {pred<-names(x)[i]}
     df<-rbind(df,cbind(pred,b,pv,t))
  }}
  return(df)
 }
--- a/R/rep_lm.R
+++ b/R/rep_lm.R
@ -0,0 +1,89 @@
 #' A repeated linear regression function 
 #'
 #' For bivariate analyses.
 #' @param y Effect meassure.
 #' @param v1 Main variable in model
 #' @keywords linear regression
 #' @export
 #' @examples
 #' rep_lm()
 rep_lm<-function(y,v1,string,ci=FALSE,data,v2=NULL,v3=NULL){
 ## x is data.frame of predictors, y is vector of an aoutcome as a factor
 ## output is returned as coefficient, or if ci=TRUE as coefficient with 95 % CI.
 ## The confint() function is rather slow, causing the whole function to hang when including many predictors and calculating the ORs with CI.
  require(broom)
  d<-data
  x<-select(d,one_of(c(string)))
  m1<-length(coef(lm(y~v1)))
 if (is.factor(y)){stop("Some kind of error message would be nice, but y should not be a factor!")}
    if (ci==TRUE){
             df<-data.frame(matrix(ncol = 4))
  names(df)<-c("pred","co_ci","pv","t")
      for(i in 1:ncol(x)){
     m<-lm(y~v1+x[,i])
     l<-suppressMessages(round(confint(m)[-c(1:m1),1],2))
     u<-suppressMessages(round(confint(m)[-c(1:m1),2],2))
     co<-round(coef(m)[-c(1:m1)],2)
     co_ci<-paste0(co," (",l," to ",u,")")
     pv<-round(tidy(m)$p.value[-c(1:m1)],3)
     pv<-ifelse(pv<0.001,"<0.001",pv)
        t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
     pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
                  ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
    v<-x[,i]
    if (is.factor(v)){
       pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
    }
       else {pred<-names(x)[i]}
      df<-rbind(df,cbind(pred,co_ci,pv,t))
    }}
  if (ci==FALSE){
       df<-data.frame(matrix(ncol = 4))
  names(df)<-c("pred","b","pv","t")
  for(i in 1:ncol(x)){
     m<-lm(y~v1+x[,i])
     b<-round(coef(m)[-c(1:m1)],3)
     pv<-round(tidy(m)$p.value[-c(1:m1)],3)
     pv<-ifelse(pv<0.001,"<0.001",pv)
    t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
     pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
                  ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
     v<-x[,i]
     if (is.factor(v)){
       pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
     }
       else {pred<-names(x)[i]}
     df<-rbind(df,cbind(pred,b,pv,t))
  }}
  return(df)
 }
--- a/man/age_calc.Rd
+++ b/man/age_calc.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/age_calc_function.R
 \name{age_calc}
 \alias{age_calc}
 \title{Calculating age from date of birth}
 \usage{
 age_calc(dob, enddate = Sys.Date(), units = "years", precise = TRUE)
 }
 \arguments{
 \item{dob}{Date of birth.}
 \item{enddate}{Date to calculate age at.}
 }
 \description{
 For age calculations.
 }
 \examples{
 age_calc()
 }
 \keyword{age}
--- a/man/cie_test.Rd
+++ b/man/cie_test.Rd
@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cie_test.R
 \name{cie_test}
 \alias{cie_test}
 \title{A repeated regression function for change-in-estimate analysis}
 \usage{
 cie_test(y, v1, string, data, logistic = FALSE, cut = 0.1, v2 = NULL,
  v3 = NULL)
 }
 \arguments{
 \item{y}{Effect meassure.}
 \item{v1}{Main variable in model}
 \item{string}{String of columnnames from dataframe to include. Use dput().}
 }
 \description{
 For bivariate analyses.
 }
 \examples{
 cie_test()
 }
 \keyword{change-in-estimate}
--- a/man/col_fact.Rd
+++ b/man/col_fact.Rd
@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/col_fact.R
 \name{col_fact}
 \alias{col_fact}
 \title{Formatting multiple columns as factor}
 \usage{
 col_fact(string, data, levels = NULL, labels = NULL)
 }
 \arguments{
 \item{string}{Columnnames containg strings.}
 \item{data}{Dataframe}
 \item{levels}{Levels for all selected columns}
 \item{labels}{labels for all selected columns}
 }
 \description{
 Depending on dply's contains()-function.
 }
 \examples{
 col_fact()
 }
 \keyword{factor}
--- a/man/col_num.Rd
+++ b/man/col_num.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/col_num.R
 \name{col_num}
 \alias{col_num}
 \title{Formatting multiple columns as numeric}
 \usage{
 col_num(string, data)
 }
 \arguments{
 \item{string}{Columnnames containg strings.}
 \item{data}{Dataframe}
 }
 \description{
 Depending on dply's contains()-function.
 }
 \examples{
 col_num()
 }
 \keyword{numeric}
--- a/man/cpr_check.Rd
+++ b/man/cpr_check.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cpr_check_function.R
 \name{cpr_check}
 \alias{cpr_check}
 \title{CPR check}
 \usage{
 cpr_check(x)
 }
 \arguments{
 \item{x}{cpr as "ddmmyy-xxxx".}
 }
 \description{
 Checking validity of cpr number
 }
 \examples{
 cpr_check()
 }
 \keyword{cpr}
--- a/man/cpr_sex.Rd
+++ b/man/cpr_sex.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/cpr_sex_function.R
 \name{cpr_sex}
 \alias{cpr_sex}
 \title{Determine sex from CPR}
 \usage{
 cpr_sex(x)
 }
 \arguments{
 \item{x}{cpr as "ddmmyy-xxxx".}
 }
 \description{
 Format "ddmmyy-xxxx"
 }
 \examples{
 cpr_sex()
 }
 \keyword{cpr}
--- a/man/date_convert.Rd
+++ b/man/date_convert.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/date_convert_function.R
 \name{date_convert}
 \alias{date_convert}
 \title{Converting data format}
 \usage{
 date_convert(x)
 }
 \arguments{
 \item{x}{data as as dd/mm/yyyy.}
 }
 \description{
 Input format as dd/mm/yyyy, output is standard yyyy-mm-dd
 }
 \examples{
 date_convert()
 }
 \keyword{date}
--- a/man/dob_extract_cpr.Rd
+++ b/man/dob_extract_cpr.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/dob_extract_cpr_function.R
 \name{dob_extract_cpr}
 \alias{dob_extract_cpr}
 \title{Extracting date of birth from CPR}
 \usage{
 dob_extract_cpr(cpr)
 }
 \arguments{
 \item{cpr}{cpr-numbers in format ddmmyy-xxxx.}
 }
 \description{
 For easy calculation.
 }
 \examples{
 dob_extract_cpr()
 }
 \keyword{cpr}
--- a/man/hwe_allele.Rd
+++ b/man/hwe_allele.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/hwe_allele.R
 \name{hwe_allele}
 \alias{hwe_allele}
 \title{HWE calculation by alleles}
 \usage{
 hwe_allele(x, y)
 }
 \arguments{
 \item{x}{Allele 1.}
 \item{y}{Allele 2.}
 }
 \description{
 For easy calculation.
 }
 \examples{
 hwe_allele()
 }
 \keyword{hardy-weinberg-equllibrium}
--- a/man/hwe_geno.Rd
+++ b/man/hwe_geno.Rd
@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/hwe_geno.R
 \name{hwe_geno}
 \alias{hwe_geno}
 \title{HWE calculation by genotypes}
 \usage{
 hwe_geno(mm, mn, nn, mo, no, oo, alleles = 2)
 }
 \arguments{
 \item{mm}{First count of genotype.}
 }
 \description{
 For easy calculation.
 }
 \examples{
 hwe_geno()
 }
 \keyword{hardy-weinberg-equllibrium}
--- a/man/hwe_sum.Rd
+++ b/man/hwe_sum.Rd
@ -0,0 +1,22 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/hwe_sum.R
 \name{hwe_sum}
 \alias{hwe_sum}
 \title{Summarising function of HWE calculation}
 \usage{
 hwe_sum(a1, a2, f)
 }
 \arguments{
 \item{a1}{Allele 1.}
 \item{a2}{Allele 2.}
 \item{f}{factor for grouping.}
 }
 \description{
 For easy printing.
 }
 \examples{
 hwe_sum()
 }
 \keyword{hardy-weinberg-equllibrium}
--- a/man/rep_biv.Rd
+++ b/man/rep_biv.Rd
@ -0,0 +1,22 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/rep_biv.R
 \name{rep_biv}
 \alias{rep_biv}
 \title{A repeated function}
 \usage{
 rep_biv(y, v1, string, data, method = "pval", logistic = FALSE,
  ci = FALSE, cut = 0.1, v2 = NULL, v3 = NULL)
 }
 \arguments{
 \item{y}{Effect meassure.}
 \item{v1}{Main variable in model}
 }
 \description{
 For bivariate analyses, for gating by p-value or change-in-estimate.
 }
 \examples{
 rep_biv()
 }
 \keyword{logistic}
 \keyword{regression}
--- a/man/rep_glm.Rd
+++ b/man/rep_glm.Rd
@ -0,0 +1,21 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/rep_glm.R
 \name{rep_glm}
 \alias{rep_glm}
 \title{A repeated logistic regression function}
 \usage{
 rep_glm(y, v1, string, ci = FALSE, data, v2 = NULL, v3 = NULL)
 }
 \arguments{
 \item{y}{Effect meassure.}
 \item{v1}{Main variable in model}
 }
 \description{
 For bivariate analyses.
 }
 \examples{
 rep_glm()
 }
 \keyword{logistic}
 \keyword{regression}
--- a/man/rep_lm.Rd
+++ b/man/rep_lm.Rd
@ -0,0 +1,21 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/rep_lm.R
 \name{rep_lm}
 \alias{rep_lm}
 \title{A repeated linear regression function}
 \usage{
 rep_lm(y, v1, string, ci = FALSE, data, v2 = NULL, v3 = NULL)
 }
 \arguments{
 \item{y}{Effect meassure.}
 \item{v1}{Main variable in model}
 }
 \description{
 For bivariate analyses.
 }
 \examples{
 rep_lm()
 }
 \keyword{linear}
 \keyword{regression}