stRoke/R/cpr_tools.R

130 lines
4.0 KiB
R
Raw Normal View History

2022-09-22 14:20:46 +02:00
#' CPR check
#'
#' Checking validity of cpr number. Vectorised.
#' @param cpr cpr-numbers as ddmmyy[-.]xxxx or ddmmyyxxxx. Also mixed formatting. Vector or data frame column.
#' @keywords cpr
#'
2022-09-22 19:53:15 +02:00
#' @return Logical vector of cpr validity
2022-09-22 14:20:46 +02:00
#' @export
#'
#' @examples
#' fsd<-c("2310450637", "010115-4000", "0101896000","010189-3000","300450-1030","010150-4021")
#' cpr_check(fsd)
#' all(cpr_check(fsd))
cpr_check<-function(cpr){
# Check validity of CPR number, format ddmmyy-xxxx
2022-09-22 19:53:15 +02:00
# Build upon data from this document: https://cpr.dk/media/12066/personnummeret-i-cpr.pdf
## OBS according to new description, not all valid CPR numbers apply to this modulus 11 rule.
message(
"OBS: according to new description, not all valid CPR numbers apply to this modulus 11 rule.
Please refer to: https://cpr.dk/media/12066/personnummeret-i-cpr.pdf")
2022-09-22 14:20:46 +02:00
2022-09-22 19:53:15 +02:00
str_length <- nchar(cpr)
# Calculating length of each element in vector
2022-09-22 14:20:46 +02:00
2022-09-22 19:53:15 +02:00
cpr_short <- paste0(substr(cpr,1,6),substr(cpr,str_length-3,str_length))
# Subsetting strings to first 6 and last 4 characters to short format cpr.
2022-09-22 14:20:46 +02:00
2022-09-22 19:53:15 +02:00
cpr_matrix <- matrix(as.numeric(unlist(strsplit(cpr_short,""))),nrow=10)
# Splitting all strings by each character to list, unlisting and creating matrix. Default is by column.
test_vector <- c(4,3,2,7,6,5,4,3,2,1)
# Multiplication vector from https://cpr.dk/media/12066/personnummeret-i-cpr.pdf
colSums(cpr_matrix*test_vector) %% 11 == 0
# Testing if modulus 11 == 0 of sums of matrix * multiplication vector.
2022-09-22 14:20:46 +02:00
}
#' Extracting date of birth from CPR
#'
#' For easy calculation.
#' @param cpr cpr-numbers as ddmmyy[-.]xxxx or ddmmyyxxxx. Also mixed formatting. Vector or data frame column.
#' @keywords cpr
#'
#' @return vector of Date elements. Format yyyy-mm-dd
#' @export
#'
#' @examples
#' cpr_dob("231045-0637")
#' fsd<-c("010190-2000", "010115-4000", "0101896000","010189-3000","300450-1030","010150-4021")
#' cpr_dob(fsd)
cpr_dob<-function(cpr){
## Input as cpr-numbers in format ddmmyy-xxxx
2022-09-22 19:53:15 +02:00
## Build upon data from this document: https://cpr.dk/media/12066/personnummeret-i-cpr.pdf
2022-09-22 14:20:46 +02:00
dobs<-c()
2022-09-22 19:53:15 +02:00
a00<-c(0:99)
a36<-c(0:36)
a57<-c(0:57)
b00<-c(0:3)
b36<-c(4,9)
b57<-c(5:8)
str_length <- nchar(cpr)
# Calculating length of each element in vector
cpr_short <- paste0(substr(cpr,1,6),substr(cpr,str_length-3,str_length))
# Subsetting strings to first 6 and last 4 characters to short format cpr.
year <- as.numeric(substr(cpr_short,5,6))
ddmmyy <- as.Date(substr(cpr_short,1,6),format="%d%m%y")
2022-09-22 14:20:46 +02:00
for (i in seq_along(cpr)){
2022-09-22 19:53:15 +02:00
p56 <- year[i]
2022-09-22 14:20:46 +02:00
2022-09-22 19:53:15 +02:00
p8 <- substr(cpr_short[i],7,7)
# p8 is position 8 from the traditional cpr ddmmyy-xxxx, pos 7 in short version.
2022-09-22 14:20:46 +02:00
2022-09-22 19:53:15 +02:00
birth <- ddmmyy[i]
2022-09-22 14:20:46 +02:00
if (((p56%in%a00)&&(p8%in%b00)))
{
dob<-as.Date(format(birth, format="19%y%m%d"), format="%Y%m%d")
}
else if (((p56%in%a36)&&(p8%in%b36)))
{
dob<-as.Date(format(birth, format="20%y%m%d"), format="%Y%m%d")
}
else if ((!(p56%in%a36)&&(p8%in%b36)))
{
dob<-as.Date(format(birth, format="19%y%m%d"), format="%Y%m%d")
}
else if (((p56%in%a57)&&(p8%in%b57)))
{
dob<-as.Date(format(birth, format="20%y%m%d"), format="%Y%m%d")
}
else if ((!(p56%in%a57)&&(p8%in%b57)))
{
dob<-as.Date(format(birth, format="18%y%m%d"), format="%Y%m%d")
}
2022-09-22 19:53:15 +02:00
else {print("Input contains data in wrong format")
2022-09-22 14:20:46 +02:00
}
dobs[i]<-dob
}
as.Date(dobs, origin = "1970-01-01")
}
2022-09-22 15:45:40 +02:00
#' Determine female sex from CPR
#'
#' Just checking if last number of a string is equal or not.
#' @param cpr cpr-numbers as ddmmyy[-.]xxxx or ddmmyyxxxx. Also mixed formatting. Vector or data frame column.
#' @keywords cpr
#'
#' @return Logical vector
#' @export
#' @examples
#' cpr_female("231045-0637")
cpr_female<-function(cpr){
##Input as vector of DK cpr numbers, format "ddmmyy-xxxx", returns sex according to cpr
x <- nchar(as.character(cpr)) # Formats as character to avoid confusions
as.logical(as.integer(substr(cpr, start = x, stop = x)) %% 2)
}