Add files via upload

This commit is contained in:
agdamsbo 2018-10-02 21:07:43 +02:00 committed by GitHub
parent 8f819d3089
commit b2e02094b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 1210 additions and 0 deletions

11
DESCRIPTION Normal file
View File

@ -0,0 +1,11 @@
Package: daDoctoR
Title: Research toolbox
Version: 1.0.0
Authors@R: c(person("Andreas", "Gammelgaard Damsbo",, email = "agdamsbo@protonmail.com", role = c("aut","cre", "cph")))
Description: A collection of functions to ease my work. I'll try to keep it updated for others to use as well.
Depends: R (>= 3.4.4)
Imports: broom
License: GPL (>= 2)
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0.9000

16
NAMESPACE Normal file
View File

@ -0,0 +1,16 @@
# Generated by roxygen2: do not edit by hand
export(age_calc)
export(cie_test)
export(col_fact)
export(col_num)
export(cpr_check)
export(cpr_sex)
export(date_convert)
export(dob_extract_cpr)
export(hwe_allele)
export(hwe_geno)
export(hwe_sum)
export(rep_biv)
export(rep_glm)
export(rep_lm)

76
R/age_calc_function.R Normal file
View File

@ -0,0 +1,76 @@
#' Calculating age from date of birth
#'
#' For age calculations.
#' @param dob Date of birth.
#' @param enddate Date to calculate age at.
#' @keywords age
#' @export
#' @examples
#' age_calc()
age_calc<-function (dob, enddate = Sys.Date(), units = "years", precise = TRUE)
## Build upon the work of Jason P. Becker, as part of tihe eeptools
{
if (!inherits(dob, "Date") | !inherits(enddate, "Date")) {
stop("Both dob and enddate must be Date class objects")
}
if (enddate < dob) {
stop("End date must be a date after date of birth")
}
start <- as.POSIXlt(dob)
end <- as.POSIXlt(enddate)
if (precise) {
start_is_leap <- ifelse(start$year%%400 == 0, TRUE, ifelse(start$year%%100 ==
0, FALSE, ifelse(start$year%%4 == 0, TRUE, FALSE)))
end_is_leap <- ifelse(end$year%%400 == 0, TRUE, ifelse(end$year%%100 ==
0, FALSE, ifelse(end$year%%4 == 0, TRUE, FALSE)))
}
if (units == "days") {
result <- difftime(end, start, units = "days")
}
else if (units == "months") {
months <- sapply(mapply(seq, as.POSIXct(start), as.POSIXct(end),
by = "months", SIMPLIFY = FALSE), length) - 1
if (precise) {
month_length_end <- ifelse(end$mon == 1 & end_is_leap,
29, ifelse(end$mon == 1, 28, ifelse(end$mon %in%
c(3, 5, 8, 10), 30, 31)))
month_length_prior <- ifelse((end$mon - 1) == 1 &
start_is_leap, 29, ifelse((end$mon - 1) == 1,
28, ifelse((end$mon - 1) %in% c(3, 5, 8, 10),
30, 31)))
month_frac <- ifelse(end$mday > start$mday, (end$mday -
start$mday)/month_length_end, ifelse(end$mday <
start$mday, (month_length_prior - start$mday)/month_length_prior +
end$mday/month_length_end, 0))
result <- months + month_frac
}
else {
result <- months
}
}
else if (units == "years") {
years <- sapply(mapply(seq, as.POSIXct(start), as.POSIXct(end),
by = "years", SIMPLIFY = FALSE), length) - 1
if (precise) {
start_length <- ifelse(start_is_leap, 366, 365)
end_length <- ifelse(end_is_leap, 366, 365)
start_day <- ifelse(start_is_leap & start$yday >=
60, start$yday - 1, start$yday)
end_day <- ifelse(end_is_leap & end$yday >= 60, end$yday -
1, end$yday)
year_frac <- ifelse(start_day < end_day, (end_day -
start_day)/end_length, ifelse(start_day > end_day,
(start_length - start_day)/start_length + end_day/end_length,
0))
result <- years + year_frac
}
else {
result <- years
}
}
else {
stop("Unrecognized units. Please choose years, months, or days.")
}
return(result)
}

165
R/cie_test.R Normal file
View File

@ -0,0 +1,165 @@
#' A repeated regression function for change-in-estimate analysis
#'
#' For bivariate analyses.
#' @param y Effect meassure.
#' @param v1 Main variable in model
#' @param string String of columnnames from dataframe to include. Use dput().
#' @keywords change-in-estimate
#' @export
#' @examples
#' cie_test()
cie_test<-function(y,v1,string,data,logistic=FALSE,cut=0.1,v2=NULL,v3=NULL){
## Calculating variables, that should be included for a change in estimate analysis.
## v1-3 are possible locked variables, y is the outcome vector.
## String defines variables to test, and is provided as vector of variable names. Use dput().
## From "Modeling and variable selection in epidemiologic analysis." - S. Greenland, 1989.
require(broom)
d<-data
x<-select(d,one_of(c(string)))
if(logistic==FALSE){
if (is.factor(y)){stop("Some kind of error message would be nice, but y should not be a factor!")}
if (is.null(v2)&is.null(v3)){
e<-as.numeric(round(coef(lm(y~v1)),3))[1]
df<-data.frame(pred="base",b=e)
for(i in 1:ncol(x)){
m<-lm(y~v1+x[,i])
b<-as.numeric(round(coef(m),3))[1]
v<-x[,i]
pred<-paste(names(x)[i])
df<-rbind(df,cbind(pred,b))
}
t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
df<-cbind(df,t)
}
if (!is.null(v2)&is.null(v3)){
e<-as.numeric(round(coef(lm(y~v1+v2)),3))[1]
df<-data.frame(pred="base",b=e)
for(i in 1:ncol(x)){
m<-lm(y~v1+v2+x[,i])
b<-as.numeric(round(coef(m),3))[1]
v<-x[,i]
pred<-paste(names(x)[i])
df<-rbind(df,cbind(pred,b))
}
t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
df<-cbind(df,t)
}
if (!is.null(v2)&!is.null(v3)){
e<-as.numeric(round(coef(lm(y~v1+v2+v3)),3))[1]
df<-data.frame(pred="base",b=e)
for(i in 1:ncol(x)){
m<-lm(y~v1+v2+v3+x[,i])
b<-as.numeric(round(coef(m),3))[1]
v<-x[,i]
pred<-paste(names(x)[i])
df<-rbind(df,cbind(pred,b))
}
t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
df<-cbind(df,t)
}}
if(logistic==TRUE){
if (!is.factor(y)){stop("Some kind of error message would be nice, but y should be a factor!")}
if (is.null(v2)&is.null(v3)){
e<-as.numeric(round(exp(coef(glm(y~v1,family=binomial()))),3))[1]
df<-data.frame(pred="base",b=e)
for(i in 1:ncol(x)){
m<-glm(y~v1+x[,i],family=binomial())
b<-as.numeric(round(exp(coef(m)),3))[1]
v<-x[,i]
pred<-paste(names(x)[i])
df<-rbind(df,cbind(pred,b))
}
t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
df<-cbind(df,t)
}
if (!is.null(v2)&is.null(v3)){
e<-as.numeric(round(exp(coef(glm(y~v1+v2,family=binomial()))),3))[1]
df<-data.frame(pred="base",b=e)
for(i in 1:ncol(x)){
m<-glm(y~v1+v2+x[,i],family=binomial())
b<-as.numeric(round(exp(coef(m)),3))[1]
v<-x[,i]
pred<-paste(names(x)[i])
df<-rbind(df,cbind(pred,b))
}
t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
df<-cbind(df,t)
}
if (!is.null(v2)&!is.null(v3)){
e<-as.numeric(round(exp(coef(glm(y~v1+v2+v3,family=binomial()))),3))[1]
df<-data.frame(pred="base",b=e)
for(i in 1:ncol(x)){
m<-glm(y~v1+v2+v3+x[,i],family=binomial())
b<-as.numeric(round(exp(coef(m)),3))[1]
v<-x[,i]
pred<-paste(names(x)[i])
df<-rbind(df,cbind(pred,b))
}
t<-c(NA,ifelse(abs(e-as.numeric(df[-1,2]))>=(e*cut),"include","drop"))
df<-cbind(df,t)
}}
return(df)
}

40
R/col_fact.R Normal file
View File

@ -0,0 +1,40 @@
#' Formatting multiple columns as factor
#'
#' Depending on dply's contains()-function.
#' @param string Columnnames containg strings.
#' @param data Dataframe
#' @param levels Levels for all selected columns
#' @param labels labels for all selected columns
#' @keywords factor
#' @export
#' @examples
#' col_fact()
col_fact<-function(string,data,levels=NULL,labels=NULL){
## Defining factors for columns containing string (can be vector of multiple strings), based on dplyr.
## Factoring several columns with same levels or labels, these can be provided.
require(dplyr)
d<-data
s<-string
n<-c()
for(i in 1:length(s)){
n<-c(n,names(select(d,contains(s[i]))))
}
if (!is.null(levels)){
for(i in 1:length(n)) {
d[,n[i]]<-factor(d[,n[i]],levels=levels)}}
if (!is.null(labels)){
for(i in 1:length(n)) {
d[,n[i]]<-factor(d[,n[i]],labels=labels)
}}
else
for(i in 1:length(n)) {
d[,n[i]]<-factor(d[,n[i]])}
return(d)
}

27
R/col_num.R Normal file
View File

@ -0,0 +1,27 @@
#' Formatting multiple columns as numeric
#'
#' Depending on dply's contains()-function.
#' @param string Columnnames containg strings.
#' @param data Dataframe
#' @keywords numeric
#' @export
#' @examples
#' col_num()
col_num<-function(string,data){
## Defining factors for columns containing string (can be vector of multiple strings), based on dplyr
require(dplyr)
d<-data
s<-string
n<-c()
for(i in 1:length(s)){
n<-c(n,names(select(d,contains(s[i]))))
}
for(i in 1:length(n)) {
d[,n[i]]<-as.numeric(d[,n[i]])
}
return(d)
}

26
R/cpr_check_function.R Normal file
View File

@ -0,0 +1,26 @@
#' CPR check
#'
#' Checking validity of cpr number
#' @param x cpr as "ddmmyy-xxxx".
#' @keywords cpr
#' @export
#' @examples
#' cpr_check()
cpr_check<-function(x){
#Check validity of CPR number, format ddmmyy-xxxx
p1<-as.integer(substr(x,1,1))
p2<-as.integer(substr(x,2,2))
p3<-as.integer(substr(x,3,3))
p4<-as.integer(substr(x,4,4))
p5<-as.integer(substr(x,5,5))
p6<-as.integer(substr(x,6,6))
p7<-as.integer(substr(x,8,8))
p8<-as.integer(substr(x,9,9))
p9<-as.integer(substr(x,10,10))
p10<-as.integer(substr(x,11,11))
result<-ifelse((p1*4+p2*3+p3*2+p4*7+p5*6+p6*5+p7*4+p8*3+p9*2+p10) %% 11 == 0,"valid","invalid")
return(result)
}

16
R/cpr_sex_function.R Normal file
View File

@ -0,0 +1,16 @@
#' Determine sex from CPR
#'
#' Format "ddmmyy-xxxx"
#' @param x cpr as "ddmmyy-xxxx".
#' @keywords cpr
#' @export
#' @examples
#' cpr_sex()
cpr_sex<-function(x){
##Input as vector of DK cpr numbers, format "ddmmyy-xxxx", returns sex according to cpr
last<-as.integer(substr(x, start = 11, stop = 11))
sex<-ifelse(last %% 2 == 0, "female", "male")
return(sex)
}

15
R/date_convert_function.R Normal file
View File

@ -0,0 +1,15 @@
#' Converting data format
#'
#' Input format as dd/mm/yyyy, output is standard yyyy-mm-dd
#' @param x data as as dd/mm/yyyy.
#' @keywords date
#' @export
#' @examples
#' date_convert()
date_convert<-function(x)
## Input format as dd/mm/yyyy, output is standard yyyy-mm-dd
{
result<-as.Date(x, format="%d/%m/%Y")
print(result)
}

View File

@ -0,0 +1,62 @@
#' Extracting date of birth from CPR
#'
#' For easy calculation.
#' @param cpr cpr-numbers in format ddmmyy-xxxx.
#' @keywords cpr
#' @export
#' @examples
#' dob_extract_cpr()
dob_extract_cpr<-function(cpr)
## Input as cpr-numbers in format ddmmyy-xxxx
## Build upon data from this document: https://cpr.dk/media/167692/personnummeret%20i%20cpr.pdf
## example vector: fsd<-c("010190-2000", "010115-4000", "010189-6000","010189-3000","010150-6000","010150-4000")
{
if (any(substr(cpr,7,7)%in%c(0:9))){stop("Input format should be ddmmyy-xxxx")} # test if input is ddmmyyxxxx
else {
dobs<-c()
a00<-as.numeric(c(0:99))
a36<-as.numeric(c(0:36))
a57<-as.numeric(c(0:57))
b00<-as.numeric(c(0,1,2,3))
b36<-as.numeric(c(4,9))
b57<-as.numeric(c(5,6,7,8))
for (x in cpr)
{
p56<-as.numeric(substr(x,5,6))
p8<-as.numeric(substr(x,8,8))
birth<-as.Date(substr(x,1,6),format="%d%m%y")
if (((p56%in%a00)&&(p8%in%b00)))
{
dob<-as.Date(format(birth, format="19%y%m%d"), format="%Y%m%d")
}
else if (((p56%in%a36)&&(p8%in%b36)))
{
dob<-as.Date(format(birth, format="20%y%m%d"), format="%Y%m%d")
}
else if ((!(p56%in%a36)&&(p8%in%b36)))
{
dob<-as.Date(format(birth, format="19%y%m%d"), format="%Y%m%d")
}
else if (((p56%in%a57)&&(p8%in%b57)))
{
dob<-as.Date(format(birth, format="20%y%m%d"), format="%Y%m%d")
}
else if ((!(p56%in%a57)&&(p8%in%b57)))
{
dob<-as.Date(format(birth, format="18%y%m%d"), format="%Y%m%d")
}
else {print("Input contains data in wrong format") # test if position 5,6 or 8 contains letters as is the case for temporary cpr-numbers
}
dobs<-append(dobs,dob)
}
return(dobs)
}
}

129
R/hwe_allele.R Normal file
View File

@ -0,0 +1,129 @@
#' HWE calculation by alleles
#'
#' For easy calculation.
#' @param x Allele 1.
#' @param y Allele 2.
#' @keywords hardy-weinberg-equllibrium
#' @export
#' @examples
#' hwe_allele()
hwe_allele<-function(x,y)
{
## Witten by Andreas Gammelgaard Damsbo, agdamsbo@pm.me, based on a non-working
## applet at from http://www.husdyr.kvl.dk/htm/kc/popgen/genetik/applets/kitest.htm
all<-pmax(length(levels(factor(x))),length(levels(factor(y))))
if(all==2){
df=1
## Biallellic system, df=1
al1<-factor(x,labels=c("p","q"))
al2<-factor(y,labels=c("p","q"))
snp<-paste(al1,al2,sep = "_")
snp[snp=="q_p"]<-"p_q"
snp_f<-factor(snp,levels=c("p_p", "p_q", "q_q"))
a<-length(snp)*2
b<-length(snp)
p<-(length(al1[al1=="p"])+length(al2[al2=="p"]))/a
q<-(length(al1[al1=="q"])+length(al2[al2=="q"]))/a
al_dist<-round(rbind(cbind(p*a,q*a),cbind(p*100,q*100)),1)
al_names<-levels(factor(x))
p_p<-round(p^2*b,3)
p_q<-round(2*p*q*b,3)
q_q<-round(q^2*b,3)
hwe<-data.frame(obs=summary(snp_f),exp=rbind(p_p, p_q, q_q))
hwe$dev<-hwe$obs-hwe$exp
hwe$chi<-hwe$dev^2/hwe$exp
snp_obs<-round(rbind(summary(snp_f),summary(snp_f)/length(snp_f)*100),1)
snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
gen_names<-c(
paste(levels(factor(x))[1],levels(factor(x))[1],sep="_"),
paste(levels(factor(x))[1],levels(factor(x))[2],sep="_"),
paste(levels(factor(x))[2],levels(factor(x))[2],sep="_"))
chi<-sum(hwe$chi,na.rm = TRUE)
p_v<-pchisq(chi, df=df, lower.tail=FALSE)
}
else if(all==3){
df=3
## Triallellic system, df=3
al1<-factor(x,labels=c("p","q","r"))
al2<-factor(y,labels=c("p","q","r"))
snp<-paste(al1,al2,sep = "_")
snp[snp=="r_p"]<-"p_r"
snp[snp=="r_q"]<-"q_r"
snp[snp=="q_p"]<-"p_q"
snp_f<-factor(snp,levels=c("p_p", "p_q", "q_q","p_r","q_r", "r_r"))
a<-length(snp)*2
b<-length(snp)
p<-(length(al1[al1=="p"])+length(al2[al2=="p"]))/a
q<-(length(al1[al1=="q"])+length(al2[al2=="q"]))/a
r<-(length(al1[al1=="r"])+length(al2[al2=="r"]))/a
al_dist<-round(rbind(cbind(p*a,q*a,r*a),cbind(p*100,q*100,r*100)),1)
al_names<-levels(factor(x))
p_p<-round(p^2*b,3)
p_q<-round(2*p*q*b,3)
q_q<-round(q^2*b,3)
p_r<-round(2*r*p*b,3)
q_r<-round(2*q*r*b,3)
r_r<-round(r^2*b,3)
hwe<-data.frame(obs=summary(snp_f),exp=rbind(p_p, p_q, q_q, p_r, q_r, r_r))
hwe$dev<-hwe$obs-hwe$exp
hwe$chi<-hwe$dev^2/hwe$exp
snp_obs<-round(rbind(summary(snp_f),summary(snp_f)/length(snp_f)*100),1)
snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
gen_names<-c(
paste(levels(factor(x))[1],levels(factor(x))[1],sep="_"),
paste(levels(factor(x))[1],levels(factor(x))[2],sep="_"),
paste(levels(factor(x))[2],levels(factor(x))[2],sep="_"),
paste(levels(factor(x))[1],levels(factor(x))[3],sep="_"),
paste(levels(factor(x))[2],levels(factor(x))[3],sep="_"),
paste(levels(factor(x))[3],levels(factor(x))[3],sep="_"))
chi<-sum(hwe$chi,na.rm = TRUE)
p_v<-pchisq(chi, df=df, lower.tail=FALSE)
}
else if (!any(all==c(2,3))){stop("This formula only works for bi- or triallellic systems")}
else {stop("There was an unknown error")}
colnames(al_dist)<-al_names
colnames(snp_obs)<-gen_names
colnames(snp_exp)<-gen_names
rn<-c("N","%")
rownames(al_dist)<-rn
rownames(snp_obs)<-rn
rownames(snp_exp)<-rn
int<-ifelse(p_v<=0.05,"The null-hypothesis of difference from the HWE can be confirmed","The null-hypothesis of difference from the HWE can be rejected")
t1<-"Chi-square test for Hardy-Weinberg equillibrium for a bi- or triallellic system. Read more: http://www.husdyr.kvl.dk/htm/kc/popgen/genetics/2/2.htm"
list<-list(info=t1,n.total=b,allele.dist=al_dist,observed.dist=snp_obs,expected.dist=snp_exp,chi.value=chi,p.value=p_v,df=df,interpretation=int)
return(list)
}

103
R/hwe_geno.R Normal file
View File

@ -0,0 +1,103 @@
#' HWE calculation by genotypes
#'
#' For easy calculation.
#' @param mm First count of genotype.
#' @keywords hardy-weinberg-equllibrium
#' @export
#' @examples
#' hwe_geno()
hwe_geno<-function(mm,mn,nn,mo,no,oo,alleles=2)
{
## x is the number of common homozygote, y the heterozygote and z the rare homozygote.
if (alleles==2){
## Biallelic has three degrees of freedom
df=1
a<-sum(mm,mn,nn)*2
b<-sum(mm,mn,nn)
p<-(2*mm+mn)/a
q<-(2*nn+mn)/a
al_dist<-round(rbind(cbind(p*a,q*a),cbind(p*100,q*100)),1)
al_names<-c("m","n")
p_p<-round(p^2*b,3)
p_q<-round(2*p*q*b,3)
q_q<-round(q^2*b,3)
obs=rbind(mm,mn,nn)
exp=rbind(p_p, p_q, q_q)
hwe<-data.frame(obs,exp)
hwe$dev<-hwe$obs-hwe$exp
hwe$chi<-hwe$dev^2/hwe$exp
snp_obs<-round(rbind(hwe$obs,hwe$obs/sum(hwe$obs)*100),1)
snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
gen_names<-c("mm","mn","nn")
chi<-sum(hwe$chi,na.rm = TRUE)
p_v<-pchisq(chi, df=df, lower.tail=FALSE)
}
if(alleles==3){
## Triallelic has three degrees of freedom
df=3
a<-sum(mm,mn,nn,mo,no,oo)*2
b<-sum(mm,mn,nn,mo,no,oo)
p<-(2*mm+mn+mo)/a
q<-(2*nn+mn+no)/a
r<-(2*oo+no+mo)/a
al_dist<-round(rbind(cbind(p*a,q*a,r*a),cbind(p*100,q*100,r*100)),1)
al_names<-c("m","n","o")
p_p<-round(p^2*b,3)
p_q<-round(2*p*q*b,3)
q_q<-round(q^2*b,3)
p_r<-round(2*r*p*b,3)
q_r<-round(2*q*r*b,3)
r_r<-round(r^2*b,3)
obs=rbind(mm,mn,nn,mo,no,oo)
exp=rbind(p_p, p_q, q_q, p_r, q_r, r_r)
hwe<-data.frame(obs,exp)
hwe$dev<-hwe$obs-hwe$exp
hwe$chi<-hwe$dev^2/hwe$exp
snp_obs<-round(rbind(hwe$obs,hwe$obs/sum(hwe$obs)*100),1)
snp_exp<-round(rbind(hwe$exp,hwe$exp/b*100),1)
gen_names<-c("mm","mn","nn", "mo", "no", "oo")
chi<-sum(hwe$chi,na.rm = TRUE)
p_v<-pchisq(chi, df=df, lower.tail=FALSE)
}
colnames(al_dist)<-al_names
colnames(snp_obs)<-gen_names
colnames(snp_exp)<-gen_names
rownames(al_dist)<-c("N","%")
rownames(snp_obs)<-c("N","%")
rownames(snp_exp)<-c("N","%")
int<-ifelse(p_v<=0.05,"The null-hypothesis of difference from the HWE can be confirmed","The null-hypothesis of difference from the HWE can be rejected")
t1<-"Chi-square test for Hardy-Weinberg equillibrium for a bi- or triallellic system. Theory: http://www.husdyr.kvl.dk/htm/kc/popgen/genetics/2/2.htm"
list<-list(info=t1,n.total=b,allele.dist=al_dist,observed.dist=snp_obs,expected.dist=snp_exp,chi.value=chi,p.value=p_v,df=df,interpretation=int)
return(list)
}

34
R/hwe_sum.R Normal file
View File

@ -0,0 +1,34 @@
#' Summarising function of HWE calculation
#'
#' For easy printing.
#' @param a1 Allele 1.
#' @param a2 Allele 2.
#' @param f factor for grouping.
#' @keywords hardy-weinberg-equllibrium
#' @export
#' @examples
#' hwe_sum()
hwe_sum<-function(a1,a2,f){
## HWE summarising function, for several groups defined by factor f. Alleles are provided as vectors, a1 and a2.
source("https://raw.githubusercontent.com/agdamsbo/research/master/hwe_allele.R")
lst<-list()
df<-data.frame(cbind(a1,a2))
for (i in 1:length(ls<-split(df,f))){
grp<-names(ls)[i]
obs<-data.frame(hwe_allele(ls[[i]][,1],ls[[i]][,2])[[c("observed.dist")]])
pval<-round(hwe_allele(ls[[i]][,1],ls[[i]][,2])[[c("p.value")]],3)
prnt<-paste0(obs[1,]," (",obs[2,],")")
names(prnt)<-names(obs)
lst<-list(lst,grp,obs.dist=obs,print=prnt,hwe.pv=pval)
}
return(lst)
}

27
R/rep_biv.R Normal file
View File

@ -0,0 +1,27 @@
#' A repeated function
#'
#' For bivariate analyses, for gating by p-value or change-in-estimate.
#' @param y Effect meassure.
#' @param v1 Main variable in model
#' @keywords logistic regression
#' @export
#' @examples
#' rep_biv()
rep_biv<-function(y,v1,string,data,method="pval",logistic=FALSE,ci=FALSE,cut=0.1,v2=NULL,v3=NULL){
require(rep_lm)
require(rep_glm)
require(cie_test)
if (method=="pval"&logistic==FALSE){
rep_lm(y=y,v1=v1,string=string,data=data,ci=ci)
}
if (method=="pval"&logistic==TRUE){
rep_lm(y=y,v1=v1,string=string,data=data,ci=ci)
}
if (method=="cie"){
cie_test(y=y,v1=v1,string=string,data=data,logistic=logistic,cut=cut,v2=v2,v3=v3)
}
return(df)
}

91
R/rep_glm.R Normal file
View File

@ -0,0 +1,91 @@
#' A repeated logistic regression function
#'
#' For bivariate analyses.
#' @param y Effect meassure.
#' @param v1 Main variable in model
#' @keywords logistic regression
#' @export
#' @examples
#' rep_glm()
rep_glm<-function(y,v1,string,ci=FALSE,data,v2=NULL,v3=NULL){
## x is data.frame of predictors, y is vector of an aoutcome as a factor
## output is returned as coefficient, or if or=TRUE as OR with 95 % CI.
## The confint() function is rather slow, causing the whole function to hang when including many predictors and calculating the ORs with CI.
require(broom)
d<-data
x<-select(d,one_of(c(string)))
m1<-length(coef(glm(y~v1,family = binomial())))
if (!is.factor(y)){stop("Some kind of error message would be nice, but y should be a factor!")}
if (ci==TRUE){
df<-data.frame(matrix(ncol = 4))
names(df)<-c("pred","or_ci","pv","t")
for(i in 1:ncol(x)){
m<-glm(y~v1+x[,i],family = binomial())
l<-suppressMessages(round(exp(confint(m))[-c(1:m1),1],2))
u<-suppressMessages(round(exp(confint(m))[-c(1:m1),2],2))
or<-round(exp(coef(m))[-c(1:m1)],2)
or_ci<-paste0(or," (",l," to ",u,")")
pv<-round(tidy(m)$p.value[-c(1:m1)],3)
pv<-ifelse(pv<0.001,"<0.001",pv)
t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
v<-x[,i]
if (is.factor(v)){
pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
}
else {pred<-names(x)[i]}
df<-rbind(df,cbind(pred,or_ci,pv,t))
}}
if (ci==FALSE){
df<-data.frame(matrix(ncol = 4))
names(df)<-c("pred","b","pv","t")
for(i in 1:ncol(x)){
m<-glm(y~v1+x[,i],family = binomial())
b<-round(coef(m)[-c(1:m1)],3)
pv<-round(tidy(m)$p.value[-c(1:m1)],3)
pv<-ifelse(pv<0.001,"<0.001",pv)
t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
v<-x[,i]
if (is.factor(v)){
pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
}
else {pred<-names(x)[i]}
df<-rbind(df,cbind(pred,b,pv,t))
}}
return(df)
}

89
R/rep_lm.R Normal file
View File

@ -0,0 +1,89 @@
#' A repeated linear regression function
#'
#' For bivariate analyses.
#' @param y Effect meassure.
#' @param v1 Main variable in model
#' @keywords linear regression
#' @export
#' @examples
#' rep_lm()
rep_lm<-function(y,v1,string,ci=FALSE,data,v2=NULL,v3=NULL){
## x is data.frame of predictors, y is vector of an aoutcome as a factor
## output is returned as coefficient, or if ci=TRUE as coefficient with 95 % CI.
## The confint() function is rather slow, causing the whole function to hang when including many predictors and calculating the ORs with CI.
require(broom)
d<-data
x<-select(d,one_of(c(string)))
m1<-length(coef(lm(y~v1)))
if (is.factor(y)){stop("Some kind of error message would be nice, but y should not be a factor!")}
if (ci==TRUE){
df<-data.frame(matrix(ncol = 4))
names(df)<-c("pred","co_ci","pv","t")
for(i in 1:ncol(x)){
m<-lm(y~v1+x[,i])
l<-suppressMessages(round(confint(m)[-c(1:m1),1],2))
u<-suppressMessages(round(confint(m)[-c(1:m1),2],2))
co<-round(coef(m)[-c(1:m1)],2)
co_ci<-paste0(co," (",l," to ",u,")")
pv<-round(tidy(m)$p.value[-c(1:m1)],3)
pv<-ifelse(pv<0.001,"<0.001",pv)
t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
v<-x[,i]
if (is.factor(v)){
pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
}
else {pred<-names(x)[i]}
df<-rbind(df,cbind(pred,co_ci,pv,t))
}}
if (ci==FALSE){
df<-data.frame(matrix(ncol = 4))
names(df)<-c("pred","b","pv","t")
for(i in 1:ncol(x)){
m<-lm(y~v1+x[,i])
b<-round(coef(m)[-c(1:m1)],3)
pv<-round(tidy(m)$p.value[-c(1:m1)],3)
pv<-ifelse(pv<0.001,"<0.001",pv)
t <- ifelse(pv<=0.1|pv=="<0.001","include","drop")
pv <- ifelse(pv<=0.05|pv=="<0.001",paste0("*",pv),
ifelse(pv>0.05&pv<=0.1,paste0(".",pv),pv))
v<-x[,i]
if (is.factor(v)){
pred<-paste(names(x)[i],levels(v)[-1],sep = "_")
}
else {pred<-names(x)[i]}
df<-rbind(df,cbind(pred,b,pv,t))
}}
return(df)
}

20
man/age_calc.Rd Normal file
View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/age_calc_function.R
\name{age_calc}
\alias{age_calc}
\title{Calculating age from date of birth}
\usage{
age_calc(dob, enddate = Sys.Date(), units = "years", precise = TRUE)
}
\arguments{
\item{dob}{Date of birth.}
\item{enddate}{Date to calculate age at.}
}
\description{
For age calculations.
}
\examples{
age_calc()
}
\keyword{age}

23
man/cie_test.Rd Normal file
View File

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cie_test.R
\name{cie_test}
\alias{cie_test}
\title{A repeated regression function for change-in-estimate analysis}
\usage{
cie_test(y, v1, string, data, logistic = FALSE, cut = 0.1, v2 = NULL,
v3 = NULL)
}
\arguments{
\item{y}{Effect meassure.}
\item{v1}{Main variable in model}
\item{string}{String of columnnames from dataframe to include. Use dput().}
}
\description{
For bivariate analyses.
}
\examples{
cie_test()
}
\keyword{change-in-estimate}

24
man/col_fact.Rd Normal file
View File

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/col_fact.R
\name{col_fact}
\alias{col_fact}
\title{Formatting multiple columns as factor}
\usage{
col_fact(string, data, levels = NULL, labels = NULL)
}
\arguments{
\item{string}{Columnnames containg strings.}
\item{data}{Dataframe}
\item{levels}{Levels for all selected columns}
\item{labels}{labels for all selected columns}
}
\description{
Depending on dply's contains()-function.
}
\examples{
col_fact()
}
\keyword{factor}

20
man/col_num.Rd Normal file
View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/col_num.R
\name{col_num}
\alias{col_num}
\title{Formatting multiple columns as numeric}
\usage{
col_num(string, data)
}
\arguments{
\item{string}{Columnnames containg strings.}
\item{data}{Dataframe}
}
\description{
Depending on dply's contains()-function.
}
\examples{
col_num()
}
\keyword{numeric}

18
man/cpr_check.Rd Normal file
View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cpr_check_function.R
\name{cpr_check}
\alias{cpr_check}
\title{CPR check}
\usage{
cpr_check(x)
}
\arguments{
\item{x}{cpr as "ddmmyy-xxxx".}
}
\description{
Checking validity of cpr number
}
\examples{
cpr_check()
}
\keyword{cpr}

18
man/cpr_sex.Rd Normal file
View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cpr_sex_function.R
\name{cpr_sex}
\alias{cpr_sex}
\title{Determine sex from CPR}
\usage{
cpr_sex(x)
}
\arguments{
\item{x}{cpr as "ddmmyy-xxxx".}
}
\description{
Format "ddmmyy-xxxx"
}
\examples{
cpr_sex()
}
\keyword{cpr}

18
man/date_convert.Rd Normal file
View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/date_convert_function.R
\name{date_convert}
\alias{date_convert}
\title{Converting data format}
\usage{
date_convert(x)
}
\arguments{
\item{x}{data as as dd/mm/yyyy.}
}
\description{
Input format as dd/mm/yyyy, output is standard yyyy-mm-dd
}
\examples{
date_convert()
}
\keyword{date}

18
man/dob_extract_cpr.Rd Normal file
View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dob_extract_cpr_function.R
\name{dob_extract_cpr}
\alias{dob_extract_cpr}
\title{Extracting date of birth from CPR}
\usage{
dob_extract_cpr(cpr)
}
\arguments{
\item{cpr}{cpr-numbers in format ddmmyy-xxxx.}
}
\description{
For easy calculation.
}
\examples{
dob_extract_cpr()
}
\keyword{cpr}

20
man/hwe_allele.Rd Normal file
View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hwe_allele.R
\name{hwe_allele}
\alias{hwe_allele}
\title{HWE calculation by alleles}
\usage{
hwe_allele(x, y)
}
\arguments{
\item{x}{Allele 1.}
\item{y}{Allele 2.}
}
\description{
For easy calculation.
}
\examples{
hwe_allele()
}
\keyword{hardy-weinberg-equllibrium}

18
man/hwe_geno.Rd Normal file
View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hwe_geno.R
\name{hwe_geno}
\alias{hwe_geno}
\title{HWE calculation by genotypes}
\usage{
hwe_geno(mm, mn, nn, mo, no, oo, alleles = 2)
}
\arguments{
\item{mm}{First count of genotype.}
}
\description{
For easy calculation.
}
\examples{
hwe_geno()
}
\keyword{hardy-weinberg-equllibrium}

22
man/hwe_sum.Rd Normal file
View File

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hwe_sum.R
\name{hwe_sum}
\alias{hwe_sum}
\title{Summarising function of HWE calculation}
\usage{
hwe_sum(a1, a2, f)
}
\arguments{
\item{a1}{Allele 1.}
\item{a2}{Allele 2.}
\item{f}{factor for grouping.}
}
\description{
For easy printing.
}
\examples{
hwe_sum()
}
\keyword{hardy-weinberg-equllibrium}

22
man/rep_biv.Rd Normal file
View File

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep_biv.R
\name{rep_biv}
\alias{rep_biv}
\title{A repeated function}
\usage{
rep_biv(y, v1, string, data, method = "pval", logistic = FALSE,
ci = FALSE, cut = 0.1, v2 = NULL, v3 = NULL)
}
\arguments{
\item{y}{Effect meassure.}
\item{v1}{Main variable in model}
}
\description{
For bivariate analyses, for gating by p-value or change-in-estimate.
}
\examples{
rep_biv()
}
\keyword{logistic}
\keyword{regression}

21
man/rep_glm.Rd Normal file
View File

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep_glm.R
\name{rep_glm}
\alias{rep_glm}
\title{A repeated logistic regression function}
\usage{
rep_glm(y, v1, string, ci = FALSE, data, v2 = NULL, v3 = NULL)
}
\arguments{
\item{y}{Effect meassure.}
\item{v1}{Main variable in model}
}
\description{
For bivariate analyses.
}
\examples{
rep_glm()
}
\keyword{logistic}
\keyword{regression}

21
man/rep_lm.Rd Normal file
View File

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep_lm.R
\name{rep_lm}
\alias{rep_lm}
\title{A repeated linear regression function}
\usage{
rep_lm(y, v1, string, ci = FALSE, data, v2 = NULL, v3 = NULL)
}
\arguments{
\item{y}{Effect meassure.}
\item{v1}{Main variable in model}
}
\description{
For bivariate analyses.
}
\examples{
rep_lm()
}
\keyword{linear}
\keyword{regression}