From 21c2dc044481ead480a039ac217a5aff9658b3a7 Mon Sep 17 00:00:00 2001 From: Andreas Gammelgaard Damsbo Date: Tue, 26 Nov 2024 14:46:11 +0100 Subject: [PATCH] New function to export redcap data with labels --- R/easy_redcap.R | 164 ++++++++++++++++++++++++++++++++++++ man/REDCapCAST-package.Rd | 2 +- man/all_na.Rd | 2 +- man/as_factor.Rd | 3 +- man/clean_field_label.Rd | 22 +++++ man/read_redcap_labelled.Rd | 38 +++++++++ 6 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 man/clean_field_label.Rd create mode 100644 man/read_redcap_labelled.Rd diff --git a/R/easy_redcap.R b/R/easy_redcap.R index b3fd326..5fc340b 100644 --- a/R/easy_redcap.R +++ b/R/easy_redcap.R @@ -40,3 +40,167 @@ easy_redcap <- function(project.name, widen.data = TRUE, uri, ...) { out } + + +#' REDCap read function to preserve field labels and all factor levels +#' +#' @description +#' This works very much as `read_redcap_tables()` and might end up there +#' +#' +#' @param uri REDCap database API uri +#' @param token API token +#' @param records records to download +#' @param fields fields to download +#' @param events events to download +#' @param forms forms to download +#' @param split_forms Whether to split "repeating" or "all" forms, default is +#' "all". +#' +#' @return data.frame or list +#' @export +#' +read_redcap_labelled <- function(uri, + token, + records = NULL, + fields = NULL, + events = NULL, + forms = NULL, + split_forms = "all") { + m <- + REDCapR::redcap_metadata_read(redcap_uri = uri, token = token)[["data"]] + + # Tests + if (!is.null(fields)) { + fields_test <- fields %in% c(m$field_name, paste0(unique(m$form_name), "_complete")) + + if (any(!fields_test)) { + print(paste0( + "The following field names are invalid: ", + paste(fields[!fields_test], collapse = ", "), "." + )) + stop("Not all supplied field names are valid") + } + } + + + if (!is.null(forms)) { + forms_test <- forms %in% unique(m$form_name) + + if (any(!forms_test)) { + print(paste0( + "The following form names are invalid: ", + paste(forms[!forms_test], collapse = ", "), "." + )) + stop("Not all supplied form names are valid") + } + } + + if (!is.null(events)) { + arm_event_inst <- REDCapR::redcap_event_instruments( + redcap_uri = uri, + token = token + ) + + event_test <- events %in% unique(arm_event_inst$data$unique_event_name) + + if (any(!event_test)) { + print(paste0( + "The following event names are invalid: ", + paste(events[!event_test], collapse = ", "), "." + )) + stop("Not all supplied event names are valid") + } + } + + # Getting dataset + d <- REDCapR::redcap_read( + redcap_uri = uri, + token = token, + fields = fields, + events = events, + forms = forms, + records = records, + raw_or_label = "raw" + )[["data"]] + + # Applying labels + d <- purrr::imap(d, \(.x, .i){ + if (.i %in% m$field_name) { + # Does not handle checkboxes + out <- set_attr(.x, + label = clean_field_label(m$field_label[m$field_name == .i]), + attr = "label" + ) + out + } else { + .x + } + }) |> dplyr::bind_cols() + + d <- purrr::imap(d, \(.x, .i){ + if (any(c("radio", "dropdown") %in% m$field_type[m$field_name == .i])) { + format_redcap_factor(.x, m$select_choices_or_calculations[m$field_name == .i]) + } else { + .x + } + }) |> dplyr::bind_cols() + + # Process repeat instrument naming + # Removes any extra characters other than a-z, 0-9 and "_", to mimic raw + # instrument names. + if ("redcap_repeat_instrument" %in% names(d)) { + d$redcap_repeat_instrument <- clean_redcap_name(d$redcap_repeat_instrument) + } + + # Processing metadata to reflect focused dataset + m <- focused_metadata(m, names(d)) + + # Splitting + out <- REDCap_split(d, + m, + forms = split_forms, + primary_table_name = "" + ) + + sanitize_split(out) +} + + +#' Very simple function to remove rich text formatting from field label +#' and save the first paragraph ('

...

'). +#' +#' @param data field label +#' +#' @return character vector +#' @export +#' +#' @examples +#' clean_field_label("

Fazekas score

") +clean_field_label <- function(data) { + out <- data |> + lapply(\(.x){ + unlist(strsplit(.x, " + lapply(\(.x){ + splt <- unlist(strsplit(.x, ">")) + splt[length(splt)] + }) + Reduce(c, out) +} + + +format_redcap_factor <- function(data, meta) { + lvls <- strsplit(meta, " | ", fixed = TRUE) |> + unlist() |> + lapply(\(.x){ + splt <- unlist(strsplit(.x, ", ")) + stats::setNames(splt[1], nm = paste(splt[-1], collapse = ", ")) + }) |> + (\(.x){ + Reduce(c, .x) + })() + set_attr(data, label = lvls, attr = "labels") |> + set_attr(data, label = "labelled", attr = "class") |> + as_factor() +} diff --git a/man/REDCapCAST-package.Rd b/man/REDCapCAST-package.Rd index c30c764..32f0b06 100644 --- a/man/REDCapCAST-package.Rd +++ b/man/REDCapCAST-package.Rd @@ -4,7 +4,7 @@ \name{REDCapCAST-package} \alias{REDCapCAST} \alias{REDCapCAST-package} -\title{REDCapCAST: REDCap Castellated Data Handling and Metadata Casting} +\title{REDCapCAST: REDCap Metadata Casting and Castellated Data Handling} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} diff --git a/man/all_na.Rd b/man/all_na.Rd index 9092e71..aa81327 100644 --- a/man/all_na.Rd +++ b/man/all_na.Rd @@ -16,5 +16,5 @@ logical Check if vector is all NA } \examples{ -rep(NA,4) |> all_na() +rep(NA, 4) |> all_na() } diff --git a/man/as_factor.Rd b/man/as_factor.Rd index bf4e302..c6ebe85 100644 --- a/man/as_factor.Rd +++ b/man/as_factor.Rd @@ -65,7 +65,8 @@ c(1, 4, 3, "A", 7, 8, 1) |> as_factor() structure(c(1, 2, 3, 2, 10, 9), labels = c(Unknown = 9, Refused = 10) ) |> - as_factor() |> dput() + as_factor() |> + dput() structure(c(1, 2, 3, 2, 10, 9), labels = c(Unknown = 9, Refused = 10), diff --git a/man/clean_field_label.Rd b/man/clean_field_label.Rd new file mode 100644 index 0000000..64e92d5 --- /dev/null +++ b/man/clean_field_label.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/easy_redcap.R +\name{clean_field_label} +\alias{clean_field_label} +\title{Very simple function to remove rich text formatting from field label +and save the first paragraph ('

...

').} +\usage{ +clean_field_label(data) +} +\arguments{ +\item{data}{field label} +} +\value{ +character vector +} +\description{ +Very simple function to remove rich text formatting from field label +and save the first paragraph ('

...

'). +} +\examples{ +clean_field_label("

Fazekas score

") +} diff --git a/man/read_redcap_labelled.Rd b/man/read_redcap_labelled.Rd new file mode 100644 index 0000000..572bda3 --- /dev/null +++ b/man/read_redcap_labelled.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/easy_redcap.R +\name{read_redcap_labelled} +\alias{read_redcap_labelled} +\title{REDCap read function to preserve field labels and all factor levels} +\usage{ +read_redcap_labelled( + uri, + token, + records = NULL, + fields = NULL, + events = NULL, + forms = NULL, + split_forms = "all" +) +} +\arguments{ +\item{uri}{REDCap database API uri} + +\item{token}{API token} + +\item{records}{records to download} + +\item{fields}{fields to download} + +\item{events}{events to download} + +\item{forms}{forms to download} + +\item{split_forms}{Whether to split "repeating" or "all" forms, default is +"all".} +} +\value{ +data.frame or list +} +\description{ +This works very much as `read_redcap_tables()` and might end up there +}