From 2ba46e8e7aed6572baf2d04cb9bd8119aaad757e Mon Sep 17 00:00:00 2001 From: Andreas Gammelgaard Damsbo Date: Wed, 27 Nov 2024 09:51:51 +0100 Subject: [PATCH] added option to export "both" raw and label by labelling raw data to preserve as much information as possible --- R/easy_redcap.R | 165 +----------------------------------- R/read_redcap_tables.R | 106 ++++++++++++++++++++++- man/as_factor.Rd | 13 +++ man/clean_field_label.Rd | 2 +- man/ds2dd.Rd | 4 +- man/fct_drop.Rd | 18 ++++ man/is.labelled.Rd | 28 ++++++ man/read_redcap_labelled.Rd | 38 --------- man/read_redcap_tables.Rd | 9 +- man/redcap_meta_default.Rd | 20 ----- vignettes/Shiny-app.Rmd | 4 +- 11 files changed, 177 insertions(+), 230 deletions(-) create mode 100644 man/fct_drop.Rd create mode 100644 man/is.labelled.Rd delete mode 100644 man/read_redcap_labelled.Rd delete mode 100644 man/redcap_meta_default.Rd diff --git a/R/easy_redcap.R b/R/easy_redcap.R index 5fc340b..18c9c12 100644 --- a/R/easy_redcap.R +++ b/R/easy_redcap.R @@ -31,6 +31,7 @@ easy_redcap <- function(project.name, widen.data = TRUE, uri, ...) { out <- read_redcap_tables( uri = uri, token = key, + raw_or_label = "both", ... ) @@ -40,167 +41,3 @@ easy_redcap <- function(project.name, widen.data = TRUE, uri, ...) { out } - - -#' REDCap read function to preserve field labels and all factor levels -#' -#' @description -#' This works very much as `read_redcap_tables()` and might end up there -#' -#' -#' @param uri REDCap database API uri -#' @param token API token -#' @param records records to download -#' @param fields fields to download -#' @param events events to download -#' @param forms forms to download -#' @param split_forms Whether to split "repeating" or "all" forms, default is -#' "all". -#' -#' @return data.frame or list -#' @export -#' -read_redcap_labelled <- function(uri, - token, - records = NULL, - fields = NULL, - events = NULL, - forms = NULL, - split_forms = "all") { - m <- - REDCapR::redcap_metadata_read(redcap_uri = uri, token = token)[["data"]] - - # Tests - if (!is.null(fields)) { - fields_test <- fields %in% c(m$field_name, paste0(unique(m$form_name), "_complete")) - - if (any(!fields_test)) { - print(paste0( - "The following field names are invalid: ", - paste(fields[!fields_test], collapse = ", "), "." - )) - stop("Not all supplied field names are valid") - } - } - - - if (!is.null(forms)) { - forms_test <- forms %in% unique(m$form_name) - - if (any(!forms_test)) { - print(paste0( - "The following form names are invalid: ", - paste(forms[!forms_test], collapse = ", "), "." - )) - stop("Not all supplied form names are valid") - } - } - - if (!is.null(events)) { - arm_event_inst <- REDCapR::redcap_event_instruments( - redcap_uri = uri, - token = token - ) - - event_test <- events %in% unique(arm_event_inst$data$unique_event_name) - - if (any(!event_test)) { - print(paste0( - "The following event names are invalid: ", - paste(events[!event_test], collapse = ", "), "." - )) - stop("Not all supplied event names are valid") - } - } - - # Getting dataset - d <- REDCapR::redcap_read( - redcap_uri = uri, - token = token, - fields = fields, - events = events, - forms = forms, - records = records, - raw_or_label = "raw" - )[["data"]] - - # Applying labels - d <- purrr::imap(d, \(.x, .i){ - if (.i %in% m$field_name) { - # Does not handle checkboxes - out <- set_attr(.x, - label = clean_field_label(m$field_label[m$field_name == .i]), - attr = "label" - ) - out - } else { - .x - } - }) |> dplyr::bind_cols() - - d <- purrr::imap(d, \(.x, .i){ - if (any(c("radio", "dropdown") %in% m$field_type[m$field_name == .i])) { - format_redcap_factor(.x, m$select_choices_or_calculations[m$field_name == .i]) - } else { - .x - } - }) |> dplyr::bind_cols() - - # Process repeat instrument naming - # Removes any extra characters other than a-z, 0-9 and "_", to mimic raw - # instrument names. - if ("redcap_repeat_instrument" %in% names(d)) { - d$redcap_repeat_instrument <- clean_redcap_name(d$redcap_repeat_instrument) - } - - # Processing metadata to reflect focused dataset - m <- focused_metadata(m, names(d)) - - # Splitting - out <- REDCap_split(d, - m, - forms = split_forms, - primary_table_name = "" - ) - - sanitize_split(out) -} - - -#' Very simple function to remove rich text formatting from field label -#' and save the first paragraph ('

...

'). -#' -#' @param data field label -#' -#' @return character vector -#' @export -#' -#' @examples -#' clean_field_label("

Fazekas score

") -clean_field_label <- function(data) { - out <- data |> - lapply(\(.x){ - unlist(strsplit(.x, " - lapply(\(.x){ - splt <- unlist(strsplit(.x, ">")) - splt[length(splt)] - }) - Reduce(c, out) -} - - -format_redcap_factor <- function(data, meta) { - lvls <- strsplit(meta, " | ", fixed = TRUE) |> - unlist() |> - lapply(\(.x){ - splt <- unlist(strsplit(.x, ", ")) - stats::setNames(splt[1], nm = paste(splt[-1], collapse = ", ")) - }) |> - (\(.x){ - Reduce(c, .x) - })() - set_attr(data, label = lvls, attr = "labels") |> - set_attr(data, label = "labelled", attr = "class") |> - as_factor() -} diff --git a/R/read_redcap_tables.R b/R/read_redcap_tables.R index c05785a..5861296 100644 --- a/R/read_redcap_tables.R +++ b/R/read_redcap_tables.R @@ -11,7 +11,15 @@ #' @param fields fields to download #' @param events events to download #' @param forms forms to download -#' @param raw_or_label raw or label tags +#' @param raw_or_label raw or label tags. Can be +#' +#' * "raw": Standard [REDCapR] method to get raw values. +#' * "label": Standard [REDCapR] method to get label values. +#' * "both": Get raw values with REDCap labels applied as labels. Use +#' [as_factor()] to format factors with original labels and use the +#' [gtsummary] package to easily get beautiful tables with original labels +#' from REDCap. Use [fct_drop()] to drop empty levels. +#' #' @param split_forms Whether to split "repeating" or "all" forms, default is #' all. #' @@ -70,6 +78,12 @@ read_redcap_tables <- function(uri, } } + if (raw_or_label=="both"){ + rorl <- "raw" + } else { + rorl <- raw_or_label + } + # Getting dataset d <- REDCapR::redcap_read( redcap_uri = uri, @@ -78,9 +92,16 @@ read_redcap_tables <- function(uri, events = events, forms = forms, records = records, - raw_or_label = raw_or_label + raw_or_label = rorl )[["data"]] + if (raw_or_label=="both"){ + d <- apply_field_label(data=d,meta=m) + + d <- apply_factor_labels(data=d,meta=m) + } + + # Process repeat instrument naming # Removes any extra characters other than a-z, 0-9 and "_", to mimic raw # instrument names. @@ -101,3 +122,84 @@ read_redcap_tables <- function(uri, sanitize_split(out) } + + +#' Very simple function to remove rich text formatting from field label +#' and save the first paragraph ('

...

'). +#' +#' @param data field label +#' +#' @return character vector +#' @export +#' +#' @examples +#' clean_field_label("

Fazekas score

") +clean_field_label <- function(data) { + out <- data |> + lapply(\(.x){ + unlist(strsplit(.x, " + lapply(\(.x){ + splt <- unlist(strsplit(.x, ">")) + splt[length(splt)] + }) + Reduce(c, out) +} + + +format_redcap_factor <- function(data, meta) { + lvls <- strsplit(meta, " | ", fixed = TRUE) |> + unlist() |> + lapply(\(.x){ + splt <- unlist(strsplit(.x, ", ")) + stats::setNames(splt[1], nm = paste(splt[-1], collapse = ", ")) + }) |> + (\(.x){ + Reduce(c, .x) + })() + set_attr(data, label = lvls, attr = "labels") |> + set_attr(data, label = "redcapcast_labelled", attr = "class") +} + + + +#' Apply REDCap filed labels to data frame +#' +#' @param data REDCap exported data set +#' @param meta REDCap data dictionary +#' +#' @return data.frame +#' @export +#' +apply_field_label <- function(data,meta){ + purrr::imap(data, \(.x, .i){ + if (.i %in% meta$field_name) { + # Does not handle checkboxes + out <- set_attr(.x, + label = clean_field_label(meta$field_label[meta$field_name == .i]), + attr = "label" + ) + out + } else { + .x + } + }) |> dplyr::bind_cols() +} + +#' Preserve all factor levels from REDCap data dictionary in data export +#' +#' @param data REDCap exported data set +#' @param meta REDCap data dictionary +#' +#' @return data.frame +#' @export +#' +apply_factor_labels <- function(data,meta){ + purrr::imap(data, \(.x, .i){ + if (any(c("radio", "dropdown") %in% meta$field_type[meta$field_name == .i])) { + format_redcap_factor(.x, meta$select_choices_or_calculations[meta$field_name == .i]) + } else { + .x + } + }) |> dplyr::bind_cols() +} diff --git a/man/as_factor.Rd b/man/as_factor.Rd index c6ebe85..259aaaf 100644 --- a/man/as_factor.Rd +++ b/man/as_factor.Rd @@ -8,6 +8,8 @@ \alias{as_factor.character} \alias{as_factor.haven_labelled} \alias{as_factor.labelled} +\alias{as_factor.redcapcast_labelled} +\alias{as_factor.data.frame} \title{Convert labelled vectors to factors while preserving attributes} \usage{ as_factor(x, ...) @@ -33,6 +35,15 @@ as_factor(x, ...) ordered = FALSE, ... ) + +\method{as_factor}{redcapcast_labelled}( + x, + levels = c("default", "labels", "values", "both"), + ordered = FALSE, + ... +) + +\method{as_factor}{data.frame}(x, ..., only_labelled = TRUE) } \arguments{ \item{x}{Object to coerce to a factor.} @@ -49,6 +60,8 @@ as_factor(x, ...) \item{ordered}{If `TRUE` create an ordered (ordinal) factor, if `FALSE` (the default) create a regular (nominal) factor.} + +\item{only_labelled}{Only apply to labelled columns?} } \description{ This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending diff --git a/man/clean_field_label.Rd b/man/clean_field_label.Rd index 64e92d5..ada7145 100644 --- a/man/clean_field_label.Rd +++ b/man/clean_field_label.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/easy_redcap.R +% Please edit documentation in R/read_redcap_tables.R \name{clean_field_label} \alias{clean_field_label} \title{Very simple function to remove rich text formatting from field label diff --git a/man/ds2dd.Rd b/man/ds2dd.Rd index ea64cce..83ab6b0 100644 --- a/man/ds2dd.Rd +++ b/man/ds2dd.Rd @@ -11,7 +11,7 @@ ds2dd( field.type = "text", field.label = NULL, include.column.names = FALSE, - metadata = REDCapCAST::redcap_meta_default() + metadata = names(REDCapCAST::redcapcast_meta) ) } \arguments{ @@ -34,7 +34,7 @@ names.} column names for original data set for upload.} \item{metadata}{Metadata column names. Default is the included -REDCapCAST::redcap_meta_default.} +names(REDCapCAST::redcapcast_meta).} } \value{ data.frame or list of data.frame and vector diff --git a/man/fct_drop.Rd b/man/fct_drop.Rd new file mode 100644 index 0000000..3e418d7 --- /dev/null +++ b/man/fct_drop.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fct_drop.R +\name{fct_drop} +\alias{fct_drop} +\alias{fct_drop.data.frame} +\title{Drop unused levels preserving label data} +\usage{ +fct_drop.data.frame(x, ...) +} +\arguments{ +\item{x}{Factor to drop unused levels} + +\item{...}{Other arguments passed down to method.} +} +\description{ +This extends [forcats::fct_drop()] to natively work across a data.frame and +replace [base::droplevels()]. +} diff --git a/man/is.labelled.Rd b/man/is.labelled.Rd new file mode 100644 index 0000000..9a5233a --- /dev/null +++ b/man/is.labelled.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as_factor.R +\name{is.labelled} +\alias{is.labelled} +\title{Tests for multiple label classes} +\usage{ +is.labelled( + x, + classes = c("redcapcast_labelled", "haven_labelled", "labelled") +) +} +\arguments{ +\item{x}{data} + +\item{classes}{classes to test} +} +\value{ +logical +} +\description{ +Tests for multiple label classes +} +\examples{ +structure(c(1, 2, 3, 2, 10, 9), + labels = c(Unknown = 9, Refused = 10), + class = "haven_labelled" +) |> is.labelled() +} diff --git a/man/read_redcap_labelled.Rd b/man/read_redcap_labelled.Rd deleted file mode 100644 index 572bda3..0000000 --- a/man/read_redcap_labelled.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/easy_redcap.R -\name{read_redcap_labelled} -\alias{read_redcap_labelled} -\title{REDCap read function to preserve field labels and all factor levels} -\usage{ -read_redcap_labelled( - uri, - token, - records = NULL, - fields = NULL, - events = NULL, - forms = NULL, - split_forms = "all" -) -} -\arguments{ -\item{uri}{REDCap database API uri} - -\item{token}{API token} - -\item{records}{records to download} - -\item{fields}{fields to download} - -\item{events}{events to download} - -\item{forms}{forms to download} - -\item{split_forms}{Whether to split "repeating" or "all" forms, default is -"all".} -} -\value{ -data.frame or list -} -\description{ -This works very much as `read_redcap_tables()` and might end up there -} diff --git a/man/read_redcap_tables.Rd b/man/read_redcap_tables.Rd index 96bbee4..66ea146 100644 --- a/man/read_redcap_tables.Rd +++ b/man/read_redcap_tables.Rd @@ -28,7 +28,14 @@ read_redcap_tables( \item{forms}{forms to download} -\item{raw_or_label}{raw or label tags} +\item{raw_or_label}{raw or label tags. Can be + + * "raw": Standard [REDCapR] method to get raw values. + * "label": Standard [REDCapR] method to get label values. + * "both": Get raw values with REDCap labels applied as labels. Use + [as_factor()] to format factors with original labels and use the + [gtsummary] package to easily get beautiful tables with original labels + from REDCap. Use [fct_drop()] to drop empty levels.} \item{split_forms}{Whether to split "repeating" or "all" forms, default is all.} diff --git a/man/redcap_meta_default.Rd b/man/redcap_meta_default.Rd deleted file mode 100644 index 300e82e..0000000 --- a/man/redcap_meta_default.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ds2dd_detailed.R -\name{redcap_meta_default} -\alias{redcap_meta_default} -\title{Default column names of a REDCap data dictionary} -\usage{ -redcap_meta_default(...) -} -\arguments{ -\item{...}{ignored for now} -} -\value{ -character vector -} -\description{ -Default column names of a REDCap data dictionary -} -\examples{ -dput(redcap_meta_default()) -} diff --git a/vignettes/Shiny-app.Rmd b/vignettes/Shiny-app.Rmd index 3443fbe..911ec7e 100644 --- a/vignettes/Shiny-app.Rmd +++ b/vignettes/Shiny-app.Rmd @@ -36,14 +36,14 @@ str(ds) ```{r} ds|> - ds2dd_detailed()|> + ds2dd_detailed(metadata = names(REDCapCAST::redcapcast_meta))|> purrr::pluck("data") |> str() ``` ```{r} ds|> - ds2dd_detailed()|> + ds2dd_detailed(metadata = names(REDCapCAST::redcapcast_meta))|> purrr::pluck("meta") |> head(10) ```