support labelled data

2025-04-22 14:22:33 +02:00 · 2024-11-28 14:32:03 +01:00 · 2024-11-28 14:32:03 +01:00 · 2aa268f747
commit 2aa268f747
parent 5926c12da6
13 changed files with 188 additions and 65 deletions
--- a/R/redcap_wider.R
+++ b/R/redcap_wider.R
@ -4,14 +4,20 @@ utils::globalVariables(c(
  "inst.glue"
 ))
-#' @title Redcap Wider
+#' Transforms list of REDCap data.frames to a single wide data.frame
-#' @description Converts a list of REDCap data frames from long to wide format.
+#'
-#' Handles longitudinal projects, but not yet repeated instruments.
+#' @description Converts a list of REDCap data.frames from long to wide format.
-#' @param data A list of data frames.
+#' In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
-#' @param event.glue A dplyr::glue string for repeated events naming
+#' on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
-#' @param inst.glue A dplyr::glue string for repeated instruments naming
+#' split by \link[REDCapCAST]{REDCap_split}.
-#' @return The list of data frames in wide format.
+#'
 #' @param data A list of data frames
 #' @param event.glue A \link[glue]{glue} string for repeated events naming
 #' @param inst.glue A \link[glue]{glue} string for repeated instruments naming
 #'
 #' @return data.frame in wide format
 #' @export
 #'
 #' @importFrom tidyr pivot_wider
 #' @importFrom tidyselect all_of
 #' @importFrom purrr reduce
@ -77,6 +83,7 @@ redcap_wider <-
  function(data,
           event.glue = "{.value}_{redcap_event_name}",
           inst.glue = "{.value}_{redcap_repeat_instance}") {
    # browser()
    if (!is_repeated_longitudinal(data)) {
      if (is.list(data)) {
        if (length(data) == 1) {
@ -91,6 +98,7 @@ redcap_wider <-
      id.name <- do.call(c, lapply(data, names))[[1]]
      l <- lapply(data, function(i) {
        # browser()
        rep_inst <- "redcap_repeat_instrument" %in% names(i)
        if (rep_inst) {
@ -111,7 +119,15 @@ redcap_wider <-
            )
            s[!colnames(s) %in% c("redcap_repeat_instrument")]
          })
-          i <- Reduce(dplyr::bind_rows, k)
+
          # Labels are removed and restored after bind_rows as class "labelled"
          # is not supported
          i <- remove_labelled(k) |>
            dplyr::bind_rows()
          all_labels <- save_labels(data)
          i <- restore_labels(i, all_labels)
        }
        event <- "redcap_event_name" %in% names(i)
@ -141,8 +157,51 @@ redcap_wider <-
        }
      })
-      out <- data.frame(Reduce(f = dplyr::full_join, x = l))
+      # out <- Reduce(f = dplyr::full_join, x = l)
      out <- purrr::reduce(.x = l, .f = dplyr::full_join)
    }
    out
  }
 # Applies list of attributes to data.frame
 restore_labels <- function(data, labels) {
  stopifnot(is.list(labels))
  stopifnot(is.data.frame(data))
  for (ndx in names(labels)) {
    data <- purrr::imap(data, \(.y, .j){
      if (startsWith(.j, ndx)) {
        set_attr(.y, labels[[ndx]])
      } else {
        .y
      }
    }) |> dplyr::bind_cols()
  }
  return(data)
 }
 # Extract unique variable attributes from list of data.frames
 save_labels <- function(data) {
  stopifnot(is.list(data))
  out <- list()
  for (j in seq_along(data)) {
    out <- c(out, lapply(data[[j]], get_attr))
  }
  out[!duplicated(names(out))]
 }
 # Removes class attributes of class "labelled" or "haven_labelled"
 remove_labelled <- function(data){
  stopifnot(is.list(data))
  lapply(data, \(.x) {
    lapply(.x, \(.y) {
      if (REDCapCAST::is.labelled(.y)) {
        set_attr(.y, label = NULL, attr = "class")
      } else {
        .y
      }
    }) |>
      dplyr::bind_cols()
  })
 }
--- a/man/REDCap_split.Rd
+++ b/man/REDCap_split.Rd
@ -21,8 +21,7 @@ call.}
 JSON from an API call.}
 \item{primary_table_name}{Name given to the list element for the primary
-output table (as described in \emph{README.md}). Ignored if
+output table. Ignored if \code{forms = 'all'}.}
 \code{forms = 'all'}.}
 \item{forms}{Indicate whether to create separate tables for repeating
 instruments only or for all forms.}
@ -66,7 +65,7 @@ metadata <- postForm(
 )
 # Convert exported JSON strings into a list of data.frames
-REDCapRITS::REDCap_split(records, metadata)
+REDCapCAST::REDCap_split(records, metadata)
 # Using a raw data export -------------------------------------------------
@ -79,7 +78,7 @@ metadata <- read.csv(
 )
 # Split the tables
-REDCapRITS::REDCap_split(records, metadata)
+REDCapCAST::REDCap_split(records, metadata)
 # In conjunction with the R export script ---------------------------------
@ -96,10 +95,10 @@ source("ExampleProject_R_2018-06-03_1700.r")
 metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
 # Split the tables
-REDCapRITS::REDCap_split(data, metadata)
+REDCapCAST::REDCap_split(data, metadata)
 setwd(old)
 }
 }
 \author{
-Paul W. Egeler, M.S., GStat
+Paul W. Egeler
 }
--- a/man/as_factor.Rd
+++ b/man/as_factor.Rd
@ -8,7 +8,6 @@
 \alias{as_factor.character}
 \alias{as_factor.haven_labelled}
 \alias{as_factor.labelled}
 \alias{as_factor.redcapcast_labelled}
 \alias{as_factor.data.frame}
 \title{Convert labelled vectors to factors while preserving attributes}
 \usage{
@ -36,13 +35,6 @@ as_factor(x, ...)
  ...
 )
 \method{as_factor}{redcapcast_labelled}(
  x,
  levels = c("default", "labels", "values", "both"),
  ordered = FALSE,
  ...
 )
 \method{as_factor}{data.frame}(x, ..., only_labelled = TRUE)
 }
 \arguments{
@ -64,7 +56,7 @@ as_factor(x, ...)
 \item{only_labelled}{Only apply to labelled columns?}
 }
 \description{
-This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
+This extends \link[forcats]{as_factor} as well as \link[haven]{as_factor}, by appending
 original attributes except for "class" after converting to factor to avoid
 ta loss in case of rich formatted and labelled data.
 }
--- a/man/easy_redcap.Rd
+++ b/man/easy_redcap.Rd
@ -8,13 +8,13 @@ easy_redcap(project.name, widen.data = TRUE, uri, ...)
 }
 \arguments{
 \item{project.name}{The name of the current project (for key storage with
-`keyring::key_set()`, using the default keyring)}
+\link[keyring]{key_set}, using the default keyring)}
 \item{widen.data}{argument to widen the exported data}
 \item{uri}{REDCap database API uri}
-\item{...}{arguments passed on to `REDCapCAST::read_redcap_tables()`}
+\item{...}{arguments passed on to \link[REDCapCAST]{read_redcap_tables}.}
 }
 \value{
 data.frame or list depending on widen.data
@ -22,3 +22,8 @@ data.frame or list depending on widen.data
 \description{
 Secure API key storage and data acquisition in one
 }
 \examples{
 \dontrun{
 easy_redcap("My_new_project",fields=c("record_id","age","hypertension"))
 }
 }
--- a/man/fct2num.Rd
+++ b/man/fct2num.Rd
@ -34,15 +34,9 @@ structure(c(1, 2, 3, 2, 10, 9),
  as_factor() |>
  fct2num()
-# Outlier with labels, but no class of origin, handled like numeric vector
+structure(c(1, 2, 3, 2, 10, 9),
-# structure(c(1, 2, 3, 2, 10, 9),
+  labels = c(Unknown = 9, Refused = 10)
-#   labels = c(Unknown = 9, Refused = 10)
+) |>
-# ) |>
+  as_factor() |>
-#   as_factor() |>
+  fct2num()
 #   fct2num()
 v <- sample(6:19, 20, TRUE) |> factor()
 dput(v)
 named_levels(v)
 fct2num(v)
 }
--- a/man/format_redcap_factor.Rd
+++ b/man/format_redcap_factor.Rd
@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/read_redcap_tables.R
 \name{format_redcap_factor}
 \alias{format_redcap_factor}
 \title{Converts REDCap choices to factor levels and stores in labels attribute}
 \usage{
 format_redcap_factor(data, meta)
 }
 \arguments{
 \item{data}{vector}
 \item{meta}{vector of REDCap choices}
 }
 \value{
 vector of class "labelled" with a "labels" attribute
 }
 \description{
 Applying \link[REDCapCAST]{as_factor} to the data.frame or variable, will
 coerce to a factor.
 }
 \examples{
 format_redcap_factor(sample(1:3,20,TRUE),"1, First. | 2, second | 3, THIRD")
 }
--- a/man/get_api_key.Rd
+++ b/man/get_api_key.Rd
@ -4,14 +4,18 @@
 \alias{get_api_key}
 \title{Retrieve project API key if stored, if not, set and retrieve}
 \usage{
-get_api_key(key.name)
+get_api_key(key.name, ...)
 }
 \arguments{
 \item{key.name}{character vector of key name}
 \item{...}{passed to \link[keyring]{key_set}}
 }
 \value{
 character vector
 }
 \description{
-Retrieve project API key if stored, if not, set and retrieve
+Attempting to make secure API key storage so simple, that no other way makes
 sense. Wrapping \link[keyring]{key_get} and \link[keyring]{key_set} using the
 \link[keyring]{key_list} to check if key is in storage already.
 }
--- a/man/is.labelled.Rd
+++ b/man/is.labelled.Rd
@ -4,10 +4,7 @@
 \alias{is.labelled}
 \title{Tests for multiple label classes}
 \usage{
-is.labelled(
+is.labelled(x, classes = c("haven_labelled", "labelled"))
  x,
  classes = c("redcapcast_labelled", "haven_labelled", "labelled")
 )
 }
 \arguments{
 \item{x}{data}
--- a/man/named_levels.Rd
+++ b/man/named_levels.Rd
@ -4,7 +4,13 @@
 \alias{named_levels}
 \title{Get named vector of factor levels and values}
 \usage{
-named_levels(data, label = "labels", na.label = NULL, na.value = 99)
+named_levels(
  data,
  label = "labels",
  na.label = NULL,
  na.value = 99,
  sort.numeric = TRUE
 )
 }
 \arguments{
 \item{data}{factor}
@ -15,6 +21,8 @@ named_levels(data, label = "labels", na.label = NULL, na.value = 99)
 \item{na.value}{new value for NA strings. Ignored if na.label is NULL.
 Default is 99.}
 \item{sort.numeric}{sort factor levels if levels are numeric. Default is TRUE}
 }
 \value{
 named vector
@ -23,12 +31,16 @@ named vector
 Get named vector of factor levels and values
 }
 \examples{
 \dontrun{
 structure(c(1, 2, 3, 2, 10, 9),
  labels = c(Unknown = 9, Refused = 10),
  class = "haven_labelled"
 ) |>
  as_factor() |>
  named_levels()
-}
+structure(c(1, 2, 3, 2, 10, 9),
  labels = c(Unknown = 9, Refused = 10),
  class = "labelled"
 ) |>
  as_factor() |>
  named_levels()
 }
--- a/man/possibly_numeric.Rd
+++ b/man/possibly_numeric.Rd
@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/as_factor.R
 \name{possibly_numeric}
 \alias{possibly_numeric}
 \title{Tests if vector can be interpreted as numeric without introducing NAs by
 coercion}
 \usage{
 possibly_numeric(data)
 }
 \arguments{
 \item{data}{vector}
 }
 \value{
 logical
 }
 \description{
 Tests if vector can be interpreted as numeric without introducing NAs by
 coercion
 }
 \examples{
 c("1","5") |> possibly_numeric()
 c("1","5","e") |> possibly_numeric()
 }
--- a/man/read_redcap_tables.Rd
+++ b/man/read_redcap_tables.Rd
@ -11,8 +11,9 @@ read_redcap_tables(
  fields = NULL,
  events = NULL,
  forms = NULL,
-  raw_or_label = "label",
+  raw_or_label = c("raw", "label", "both"),
-  split_forms = "all"
+  split_forms = "all",
  ...
 )
 }
 \arguments{
@ -28,27 +29,32 @@ read_redcap_tables(
 \item{forms}{forms to download}
-\item{raw_or_label}{raw or label tags. Can be
+\item{raw_or_label}{raw or label tags. Can be "raw", "label" or "both".
-  * "raw": Standard [REDCapR] method to get raw values.
+  * "raw": Standard \link[REDCapR]{redcap_read} method to get raw values.
-  * "label": Standard [REDCapR] method to get label values.
+  * "label": Standard \link[REDCapR]{redcap_read} method to get label values.
  * "both": Get raw values with REDCap labels applied as labels. Use
-  [as_factor()] to format factors with original labels and use the
+  \link[REDCapCAST]{as_factor} to format factors with original labels and use
-  [gtsummary] package to easily get beautiful tables with original labels
+  the  `gtsummary` package functions like \link[gtsummary]{tbl_summary} to
-  from REDCap. Use [fct_drop()] to drop empty levels.}
+  easily get beautiful tables with original labels from REDCap. Use
  \link[REDCapCAST]{fct_drop} to drop empty levels.}
 \item{split_forms}{Whether to split "repeating" or "all" forms, default is
 all.}
 \item{...}{passed on to \link[REDCapR]{redcap_read}}
 }
 \value{
 list of instruments
 }
 \description{
-Implementation of REDCap_split with a focused data acquisition approach using
+Implementation of passed on to \link[REDCapCAST]{REDCap_split} with a focused
-REDCapR::redcap_read and only downloading specified fields, forms and/or
+data acquisition approach using passed on to \link[REDCapR]{redcap_read} and
-events using the built-in focused_metadata including some clean-up.
+only downloading specified fields, forms and/or events using the built-in
 focused_metadata including some clean-up.
 Works with classical and longitudinal projects with or without repeating
 instruments.
 Will preserve metadata in the data.frames as labels.
 }
 \examples{
 # Examples will be provided later
--- a/man/redcap_wider.Rd
+++ b/man/redcap_wider.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/redcap_wider.R
 \name{redcap_wider}
 \alias{redcap_wider}
-\title{Redcap Wider}
+\title{Transforms list of REDCap data.frames to a single wide data.frame}
 \usage{
 redcap_wider(
  data,
@ -11,18 +11,20 @@ redcap_wider(
 )
 }
 \arguments{
-\item{data}{A list of data frames.}
+\item{data}{A list of data frames}
-\item{event.glue}{A dplyr::glue string for repeated events naming}
+\item{event.glue}{A \link[glue]{glue} string for repeated events naming}
-\item{inst.glue}{A dplyr::glue string for repeated instruments naming}
+\item{inst.glue}{A \link[glue]{glue} string for repeated instruments naming}
 }
 \value{
-The list of data frames in wide format.
+data.frame in wide format
 }
 \description{
-Converts a list of REDCap data frames from long to wide format.
+Converts a list of REDCap data.frames from long to wide format.
-Handles longitudinal projects, but not yet repeated instruments.
+In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
 on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
 split by \link[REDCapCAST]{REDCap_split}.
 }
 \examples{
 # Longitudinal
--- a/man/sanitize_split.Rd
+++ b/man/sanitize_split.Rd
@ -7,13 +7,20 @@
 sanitize_split(
  l,
  generic.names = c("redcap_event_name", "redcap_repeat_instrument",
-    "redcap_repeat_instance")
+    "redcap_repeat_instance"),
  drop.complete = TRUE,
  drop.empty = TRUE
 )
 }
 \arguments{
 \item{l}{A list of data frames.}
 \item{generic.names}{A vector of generic names to be excluded.}
 \item{drop.complete}{logical to remove genric REDCap variables indicating
 instrument completion. Default is TRUE.}
 \item{drop.empty}{logical to remove variables with only NAs Default is TRUE.}
 }
 \value{
 A list of data frames with generic names excluded.