styling and new function is.repeated_longitudinal()

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-02-06 14:02:55 +01:00
parent 3e356004d4
commit e1509188ff
4 changed files with 188 additions and 139 deletions

View File

@ -139,6 +139,8 @@ REDCap_split <- function(records,
) )
) )
if ("redcap_repeat_instrument" %in% vars_in_data) { if ("redcap_repeat_instrument" %in% vars_in_data) {
# Variables to be at the beginning of each repeating instrument # Variables to be at the beginning of each repeating instrument
repeat_instrument_fields <- grep("^redcap_repeat.*", repeat_instrument_fields <- grep("^redcap_repeat.*",
@ -197,6 +199,5 @@ REDCap_split <- function(records,
} }
out out
} }

228
R/utils.r
View File

@ -1,5 +1,3 @@
#' focused_metadata #' focused_metadata
#' @description Extracts limited metadata for variables in a dataset #' @description Extracts limited metadata for variables in a dataset
#' @param metadata A dataframe containing metadata #' @param metadata A dataframe containing metadata
@ -8,7 +6,6 @@
#' @export #' @export
#' #'
focused_metadata <- function(metadata, vars_in_data) { focused_metadata <- function(metadata, vars_in_data) {
if (any(c("tbl_df", "tbl") %in% class(metadata))) { if (any(c("tbl_df", "tbl") %in% class(metadata))) {
metadata <- data.frame(metadata) metadata <- data.frame(metadata)
} }
@ -17,9 +14,11 @@ focused_metadata <- function(metadata, vars_in_data) {
field_type <- grepl(".*[Ff]ield[._][Tt]ype$", names(metadata)) field_type <- grepl(".*[Ff]ield[._][Tt]ype$", names(metadata))
fields <- fields <-
metadata[!metadata[, field_type] %in% c("descriptive", "checkbox") & metadata[
!metadata[, field_type] %in% c("descriptive", "checkbox") &
metadata[, field_name] %in% vars_in_data, metadata[, field_name] %in% vars_in_data,
field_name] field_name
]
# Process checkbox fields # Process checkbox fields
if (any(metadata[, field_type] == "checkbox")) { if (any(metadata[, field_type] == "checkbox")) {
@ -29,18 +28,21 @@ focused_metadata <- function(metadata, vars_in_data) {
# Processing # Processing
checkbox_basenames <- checkbox_basenames <-
metadata[metadata[, field_type] == "checkbox" & metadata[
metadata[, field_type] == "checkbox" &
metadata[, field_name] %in% vars_check, metadata[, field_name] %in% vars_check,
field_name] field_name
]
fields <- c(fields, checkbox_basenames) fields <- c(fields, checkbox_basenames)
} }
# Process instrument status fields # Process instrument status fields
form_names <- form_names <-
unique(metadata[, grepl(".*[Ff]orm[._][Nn]ame$", unique(metadata[, grepl(
names(metadata))][metadata[, field_name] ".*[Ff]orm[._][Nn]ame$",
names(metadata)
)][metadata[, field_name]
%in% fields]) %in% fields])
form_complete_fields <- paste0(form_names, "_complete") form_complete_fields <- paste0(form_names, "_complete")
@ -54,33 +56,34 @@ focused_metadata <- function(metadata, vars_in_data) {
timestamp_fields <- timestamps timestamp_fields <- timestamps
fields <- c(fields, timestamp_fields) fields <- c(fields, timestamp_fields)
} }
# Process ".*\\.factor" fields supplied by REDCap's export data R script # Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) { if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <- factor_fields <-
do.call("rbind", do.call(
"rbind",
apply(fields, apply(fields,
1, 1,
function(x, y) { function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y) field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
if (any(field_indices)) if (any(field_indices)) {
data.frame( data.frame(
field_name = y[field_indices], field_name = y[field_indices],
form_name = x[2], form_name = x[2],
stringsAsFactors = FALSE, stringsAsFactors = FALSE,
row.names = NULL row.names = NULL
) )
}
}, },
y = vars_in_data)) y = vars_in_data
)
)
fields <- c(fields, factor_fields[, 1]) fields <- c(fields, factor_fields[, 1])
} }
metadata[metadata[, field_name] %in% fields, ] metadata[metadata[, field_name] %in% fields, ]
} }
#' clean_redcap_name #' clean_redcap_name
@ -94,13 +97,18 @@ focused_metadata <- function(metadata, vars_in_data) {
#' @return vector or data frame, same format as input #' @return vector or data frame, same format as input
#' @export #' @export
#' #'
clean_redcap_name <- function(x){ clean_redcap_name <- function(x) {
gsub(
gsub(" ", "_", " ", "_",
gsub("[' ']$","", gsub(
gsub("[^a-z0-9' '_]", "", "[' ']$", "",
gsub(
"[^a-z0-9' '_]", "",
tolower(x) tolower(x)
)))} )
)
)
}
#' Sanitize list of data frames #' Sanitize list of data frames
@ -116,15 +124,18 @@ clean_redcap_name <- function(x){
#' #'
sanitize_split <- function(l, sanitize_split <- function(l,
generic.names = c( generic.names = c(
"record_id",
"redcap_event_name", "redcap_event_name",
"redcap_repeat_instrument", "redcap_repeat_instrument",
"redcap_repeat_instance" "redcap_repeat_instance"
)) { )) {
generic.names <- c(get_id_name(l),
generic.names,
paste0(names(l), "_complete"))
lapply(l, function(i) { lapply(l, function(i) {
if (ncol(i) > 2) { if (ncol(i) > 2) {
s <- data.frame(i[, !colnames(i) %in% generic.names]) s <- data.frame(i[, !colnames(i) %in% generic.names])
i[!apply(is.na(s), MARGIN = 1, FUN = all),] i[!apply(is.na(s), MARGIN = 1, FUN = all), ]
} else { } else {
i i
} }
@ -132,6 +143,19 @@ sanitize_split <- function(l,
} }
#' Get the id name
#'
#' @param data data frame or list
#'
#' @return character vector
get_id_name <- function(data) {
if ("list" %in% class(data)) {
do.call(c, lapply(data, names))[[1]]
} else {
names(data)[[1]]
}
}
#' Match fields to forms #' Match fields to forms
#' #'
#' @param metadata A data frame containing field names and form names #' @param metadata A data frame containing field names and form names
@ -143,20 +167,23 @@ sanitize_split <- function(l,
#' #'
#' #'
match_fields_to_form <- function(metadata, vars_in_data) { match_fields_to_form <- function(metadata, vars_in_data) {
metadata <- data.frame(metadata) metadata <- data.frame(metadata)
field_form_name <- grepl(".*([Ff]ield|[Ff]orm)[._][Nn]ame$",names(metadata)) field_form_name <- grepl(".*([Ff]ield|[Ff]orm)[._][Nn]ame$", names(metadata))
field_type <- grepl(".*[Ff]ield[._][Tt]ype$",names(metadata)) field_type <- grepl(".*[Ff]ield[._][Tt]ype$", names(metadata))
fields <- metadata[!metadata[,field_type] %in% c("descriptive", "checkbox"), fields <- metadata[
field_form_name] !metadata[, field_type] %in% c("descriptive", "checkbox"),
field_form_name
]
names(fields) <- c("field_name", "form_name") names(fields) <- c("field_name", "form_name")
# Process instrument status fields # Process instrument status fields
form_names <- unique(metadata[,grepl(".*[Ff]orm[._][Nn]ame$", form_names <- unique(metadata[, grepl(
names(metadata))]) ".*[Ff]orm[._][Nn]ame$",
names(metadata)
)])
form_complete_fields <- data.frame( form_complete_fields <- data.frame(
field_name = paste0(form_names, "_complete"), field_name = paste0(form_names, "_complete"),
form_name = form_names, form_name = form_names,
@ -176,57 +203,65 @@ match_fields_to_form <- function(metadata, vars_in_data) {
) )
fields <- rbind(fields, timestamp_fields) fields <- rbind(fields, timestamp_fields)
} }
# Process checkbox fields # Process checkbox fields
if (any(metadata[,field_type] == "checkbox")) { if (any(metadata[, field_type] == "checkbox")) {
checkbox_basenames <- metadata[metadata[,field_type] == "checkbox", checkbox_basenames <- metadata[
field_form_name] metadata[, field_type] == "checkbox",
field_form_name
]
checkbox_fields <- checkbox_fields <-
do.call("rbind", do.call(
"rbind",
apply(checkbox_basenames, apply(checkbox_basenames,
1, 1,
function(x, y) function(x, y) {
data.frame( data.frame(
field_name = field_name =
y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"),
y, perl = TRUE)], y,
perl = TRUE
)],
form_name = x[2], form_name = x[2],
stringsAsFactors = FALSE, stringsAsFactors = FALSE,
row.names = NULL row.names = NULL
), )
y = vars_in_data)) },
y = vars_in_data
)
)
fields <- rbind(fields, checkbox_fields) fields <- rbind(fields, checkbox_fields)
} }
# Process ".*\\.factor" fields supplied by REDCap's export data R script # Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) { if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <- factor_fields <-
do.call("rbind", do.call(
"rbind",
apply(fields, apply(fields,
1, 1,
function(x, y) { function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y) field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
if (any(field_indices)) if (any(field_indices)) {
data.frame( data.frame(
field_name = y[field_indices], field_name = y[field_indices],
form_name = x[2], form_name = x[2],
stringsAsFactors = FALSE, stringsAsFactors = FALSE,
row.names = NULL row.names = NULL
) )
}
}, },
y = vars_in_data)) y = vars_in_data
)
)
fields <- rbind(fields, factor_fields) fields <- rbind(fields, factor_fields)
} }
fields fields
} }
#' Split a data frame into separate tables for each form #' Split a data frame into separate tables for each form
@ -256,10 +291,12 @@ match_fields_to_form <- function(metadata, vars_in_data) {
#' #'
#' # Create the fields #' # Create the fields
#' fields <- matrix( #' fields <- matrix(
#' c("form_a_name", "form_a", #' c(
#' "form_a_name", "form_a",
#' "form_a_age", "form_a", #' "form_a_age", "form_a",
#' "form_b_name", "form_b", #' "form_b_name", "form_b",
#' "form_b_gender", "form_b"), #' "form_b_gender", "form_b"
#' ),
#' ncol = 2, byrow = TRUE #' ncol = 2, byrow = TRUE
#' ) #' )
#' #'
@ -269,14 +306,17 @@ split_non_repeating_forms <-
function(table, universal_fields, fields) { function(table, universal_fields, fields) {
forms <- unique(fields[[2]]) forms <- unique(fields[[2]])
x <- lapply(forms, x <- lapply(
function (x) { forms,
table[names(table) %in% union(universal_fields, function(x) {
fields[fields[, 2] == x, 1])] table[names(table) %in% union(
}) universal_fields,
fields[fields[, 2] == x, 1]
)]
}
)
structure(x, names = forms) structure(x, names = forms)
} }
@ -295,7 +335,7 @@ split_non_repeating_forms <-
#' #'
#' @examples #' @examples
#' test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now") #' test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
#' strsplitx(test,"[0-9]",type="around") #' strsplitx(test, "[0-9]", type = "around")
strsplitx <- function(x, strsplitx <- function(x,
split, split,
type = "classic", type = "classic",
@ -306,26 +346,33 @@ strsplitx <- function(x,
out <- base::strsplit(x = x, split = split, perl = perl, ...) out <- base::strsplit(x = x, split = split, perl = perl, ...)
} else if (type == "before") { } else if (type == "before") {
# split before the delimiter and keep it # split before the delimiter and keep it
out <- base::strsplit(x = x, out <- base::strsplit(
x = x,
split = paste0("(?<=.)(?=", split, ")"), split = paste0("(?<=.)(?=", split, ")"),
perl = TRUE, perl = TRUE,
...) ...
)
} else if (type == "after") { } else if (type == "after") {
# split after the delimiter and keep it # split after the delimiter and keep it
out <- base::strsplit(x = x, out <- base::strsplit(
x = x,
split = paste0("(?<=", split, ")"), split = paste0("(?<=", split, ")"),
perl = TRUE, perl = TRUE,
...) ...
)
} else if (type == "around") { } else if (type == "around") {
# split around the defined delimiter # split around the defined delimiter
out <- base::strsplit(gsub("~~", "~", # Removes double ~ out <- base::strsplit(gsub(
gsub("^~", "", # Removes leading ~ "~~", "~", # Removes double ~
gsub(
"^~", "", # Removes leading ~
gsub( gsub(
# Splits and inserts ~ at all delimiters # Splits and inserts ~ at all delimiters
paste0("(", split, ")"), "~\\1~", x paste0("(", split, ")"), "~\\1~", x
))), "~") )
)
), "~")
} else { } else {
# wrong type input # wrong type input
stop("type must be 'classic', 'after', 'before' or 'around'!") stop("type must be 'classic', 'after', 'before' or 'around'!")
@ -345,37 +392,36 @@ strsplitx <- function(x,
#' @export #' @export
#' #'
#' @examples #' @examples
#' d2w(c(2:8,21)) #' d2w(c(2:8, 21))
#' d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE) #' d2w(data.frame(2:7, 3:8, 1), lang = "da", neutrum = TRUE)
#' #'
#' ## If everything=T, also larger numbers are reduced. #' ## If everything=T, also larger numbers are reduced.
#' ## Elements in the list are same length as input #' ## Elements in the list are same length as input
#' d2w(list(2:8,c(2,6,4,23),2), everything=TRUE) #' d2w(list(2:8, c(2, 6, 4, 23), 2), everything = TRUE)
#' #'
d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) { d2w <- function(x, lang = "en", neutrum = FALSE, everything = FALSE) {
# In Danish the written 1 depends on the counted word # In Danish the written 1 depends on the counted word
if (neutrum) nt <- "t" else nt <- "n" if (neutrum) nt <- "t" else nt <- "n"
# A sapply() call with nested lapply() to handle vectors, data.frames and lists # A sapply() call with nested lapply() to handle vectors, data.frames and lists
convert <- function(x, lang, neutrum) { convert <- function(x, lang, neutrum) {
zero_nine = data.frame( zero_nine <- data.frame(
num = 0:9, num = 0:9,
en = c( en = c(
'zero', "zero",
'one', "one",
'two', "two",
'three', "three",
'four', "four",
'five', "five",
'six', "six",
'seven', "seven",
'eight', "eight",
'nine' "nine"
), ),
da = c( da = c(
"nul", "nul",
paste0("e",nt), paste0("e", nt),
"to", "to",
"tre", "tre",
"fire", "fire",
@ -401,15 +447,14 @@ d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) {
# Also converts numbers >9 to single digits and writes out # Also converts numbers >9 to single digits and writes out
# Uses strsplitx() # Uses strsplitx()
if (everything) { if (everything) {
out <- sapply(x,function(y){ out <- sapply(x, function(y) {
do.call(c,lapply(y,function(z){ do.call(c, lapply(y, function(z) {
v <- strsplitx(z,"[0-9]",type="around") v <- strsplitx(z, "[0-9]", type = "around")
Reduce(paste,sapply(v,convert,lang = lang, neutrum = neutrum)) Reduce(paste, sapply(v, convert, lang = lang, neutrum = neutrum))
})) }))
}) })
} else { } else {
out <- sapply(x,convert,lang = lang, neutrum = neutrum) out <- sapply(x, convert, lang = lang, neutrum = neutrum)
} }
if (is.data.frame(x)) out <- data.frame(out) if (is.data.frame(x)) out <- data.frame(out)
@ -426,19 +471,20 @@ d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) {
#' @return logical #' @return logical
#' @examples #' @examples
#' is.repeated_longitudinal(c("record_id", "age", "record_id", "gender")) #' is.repeated_longitudinal(c("record_id", "age", "record_id", "gender"))
#' #' is.repeated_longitudinal(redcapcast_data)
#' is.repeated_longitudinal(list(redcapcast_data))
is.repeated_longitudinal <- function(data, generics = c( is.repeated_longitudinal <- function(data, generics = c(
"redcap_event_name", "redcap_event_name",
"redcap_repeat_instrument", "redcap_repeat_instrument",
"redcap_repeat_instance" "redcap_repeat_instance"
)) { )) {
if (is.list(data)) { if ("list" %in% class(data)) {
names <- data |> names <- data |>
lapply(names) |> lapply(names) |>
purrr::list_c() purrr::list_c()
} else if (is.data.frame(data)) { } else if ("data.frame" %in% class(data)) {
names <- names(data) names <- names(data)
} else if (is.vector(data)) { } else if ("character" %in% class(data)) {
names <- data names <- data
} }
any(generics %in% names) any(generics %in% names)

View File

@ -6,7 +6,7 @@
\usage{ \usage{
sanitize_split( sanitize_split(
l, l,
generic.names = c("record_id", "redcap_event_name", "redcap_repeat_instrument", generic.names = c("redcap_event_name", "redcap_repeat_instrument",
"redcap_repeat_instance") "redcap_repeat_instance")
) )
} }

View File

@ -36,10 +36,12 @@ universal_fields <- c("id")
# Create the fields # Create the fields
fields <- matrix( fields <- matrix(
c("form_a_name", "form_a", c(
"form_a_name", "form_a",
"form_a_age", "form_a", "form_a_age", "form_a",
"form_b_name", "form_b", "form_b_name", "form_b",
"form_b_gender", "form_b"), "form_b_gender", "form_b"
),
ncol = 2, byrow = TRUE ncol = 2, byrow = TRUE
) )