support labelled data

2025-04-22 06:12:34 +02:00 · 2024-11-28 14:32:03 +01:00 · 2024-11-28 14:32:03 +01:00 · 2aa268f747
commit 2aa268f747
parent 5926c12da6
13 changed files with 188 additions and 65 deletions
--- a/R/redcap_wider.R
+++ b/R/redcap_wider.R
@ -4,14 +4,20 @@ utils::globalVariables(c(
  "inst.glue"
 ))

-#' @title Redcap Wider
-#' @description Converts a list of REDCap data frames from long to wide format.
-#' Handles longitudinal projects, but not yet repeated instruments.
-#' @param data A list of data frames.
-#' @param event.glue A dplyr::glue string for repeated events naming
-#' @param inst.glue A dplyr::glue string for repeated instruments naming
-#' @return The list of data frames in wide format.
+#' Transforms list of REDCap data.frames to a single wide data.frame
+#'
+#' @description Converts a list of REDCap data.frames from long to wide format.
+#' In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
+#' on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
+#' split by \link[REDCapCAST]{REDCap_split}.
+#'
+#' @param data A list of data frames
+#' @param event.glue A \link[glue]{glue} string for repeated events naming
+#' @param inst.glue A \link[glue]{glue} string for repeated instruments naming
+#'
+#' @return data.frame in wide format
 #' @export
+#'
 #' @importFrom tidyr pivot_wider
 #' @importFrom tidyselect all_of
 #' @importFrom purrr reduce
@ -77,6 +83,7 @@ redcap_wider <-
  function(data,
           event.glue = "{.value}_{redcap_event_name}",
           inst.glue = "{.value}_{redcap_repeat_instance}") {
+    # browser()
    if (!is_repeated_longitudinal(data)) {
      if (is.list(data)) {
        if (length(data) == 1) {
@ -91,6 +98,7 @@ redcap_wider <-
      id.name <- do.call(c, lapply(data, names))[[1]]

      l <- lapply(data, function(i) {
+        # browser()
        rep_inst <- "redcap_repeat_instrument" %in% names(i)

        if (rep_inst) {
@ -111,7 +119,15 @@ redcap_wider <-
            )
            s[!colnames(s) %in% c("redcap_repeat_instrument")]
          })
-          i <- Reduce(dplyr::bind_rows, k)
+
+          # Labels are removed and restored after bind_rows as class "labelled"
+          # is not supported
+          i <- remove_labelled(k) |>
+            dplyr::bind_rows()
+
+          all_labels <- save_labels(data)
+
+          i <- restore_labels(i, all_labels)
        }

        event <- "redcap_event_name" %in% names(i)
@ -141,8 +157,51 @@ redcap_wider <-
        }
      })

-      out <- data.frame(Reduce(f = dplyr::full_join, x = l))
+      # out <- Reduce(f = dplyr::full_join, x = l)
+      out <- purrr::reduce(.x = l, .f = dplyr::full_join)
    }

    out
  }
+
+# Applies list of attributes to data.frame
+restore_labels <- function(data, labels) {
+  stopifnot(is.list(labels))
+  stopifnot(is.data.frame(data))
+  for (ndx in names(labels)) {
+    data <- purrr::imap(data, \(.y, .j){
+      if (startsWith(.j, ndx)) {
+        set_attr(.y, labels[[ndx]])
+      } else {
+        .y
+      }
+    }) |> dplyr::bind_cols()
+  }
+  return(data)
+}
+
+# Extract unique variable attributes from list of data.frames
+save_labels <- function(data) {
+  stopifnot(is.list(data))
+  out <- list()
+  for (j in seq_along(data)) {
+    out <- c(out, lapply(data[[j]], get_attr))
+  }
+
+  out[!duplicated(names(out))]
+}
+
+# Removes class attributes of class "labelled" or "haven_labelled"
+remove_labelled <- function(data){
+  stopifnot(is.list(data))
+  lapply(data, \(.x) {
+    lapply(.x, \(.y) {
+      if (REDCapCAST::is.labelled(.y)) {
+        set_attr(.y, label = NULL, attr = "class")
+      } else {
+        .y
+      }
+    }) |>
+      dplyr::bind_cols()
+  })
+}
--- a/man/REDCap_split.Rd
+++ b/man/REDCap_split.Rd
@ -21,8 +21,7 @@ call.}
 JSON from an API call.}

 \item{primary_table_name}{Name given to the list element for the primary
-output table (as described in \emph{README.md}). Ignored if
-\code{forms = 'all'}.}
+output table. Ignored if \code{forms = 'all'}.}

 \item{forms}{Indicate whether to create separate tables for repeating
 instruments only or for all forms.}
@ -66,7 +65,7 @@ metadata <- postForm(
 )

 # Convert exported JSON strings into a list of data.frames
-REDCapRITS::REDCap_split(records, metadata)
+REDCapCAST::REDCap_split(records, metadata)

 # Using a raw data export -------------------------------------------------

@ -79,7 +78,7 @@ metadata <- read.csv(
 )

 # Split the tables
-REDCapRITS::REDCap_split(records, metadata)
+REDCapCAST::REDCap_split(records, metadata)

 # In conjunction with the R export script ---------------------------------

@ -96,10 +95,10 @@ source("ExampleProject_R_2018-06-03_1700.r")
 metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")

 # Split the tables
-REDCapRITS::REDCap_split(data, metadata)
+REDCapCAST::REDCap_split(data, metadata)
 setwd(old)
 }
 }
 \author{
-Paul W. Egeler, M.S., GStat
+Paul W. Egeler
 }
--- a/man/as_factor.Rd
+++ b/man/as_factor.Rd
@ -8,7 +8,6 @@
 \alias{as_factor.character}
 \alias{as_factor.haven_labelled}
 \alias{as_factor.labelled}
-\alias{as_factor.redcapcast_labelled}
 \alias{as_factor.data.frame}
 \title{Convert labelled vectors to factors while preserving attributes}
 \usage{
@ -36,13 +35,6 @@ as_factor(x, ...)
  ...
 )

-\method{as_factor}{redcapcast_labelled}(
-  x,
-  levels = c("default", "labels", "values", "both"),
-  ordered = FALSE,
-  ...
-)
-
 \method{as_factor}{data.frame}(x, ..., only_labelled = TRUE)
 }
 \arguments{
@ -64,7 +56,7 @@ as_factor(x, ...)
 \item{only_labelled}{Only apply to labelled columns?}
 }
 \description{
-This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
+This extends \link[forcats]{as_factor} as well as \link[haven]{as_factor}, by appending
 original attributes except for "class" after converting to factor to avoid
 ta loss in case of rich formatted and labelled data.
 }
--- a/man/easy_redcap.Rd
+++ b/man/easy_redcap.Rd
@ -8,13 +8,13 @@ easy_redcap(project.name, widen.data = TRUE, uri, ...)
 }
 \arguments{
 \item{project.name}{The name of the current project (for key storage with
-`keyring::key_set()`, using the default keyring)}
+\link[keyring]{key_set}, using the default keyring)}

 \item{widen.data}{argument to widen the exported data}

 \item{uri}{REDCap database API uri}

-\item{...}{arguments passed on to `REDCapCAST::read_redcap_tables()`}
+\item{...}{arguments passed on to \link[REDCapCAST]{read_redcap_tables}.}
 }
 \value{
 data.frame or list depending on widen.data
@ -22,3 +22,8 @@ data.frame or list depending on widen.data
 \description{
 Secure API key storage and data acquisition in one
 }
+\examples{
+\dontrun{
+easy_redcap("My_new_project",fields=c("record_id","age","hypertension"))
+}
+}
--- a/man/fct2num.Rd
+++ b/man/fct2num.Rd
@ -34,15 +34,9 @@ structure(c(1, 2, 3, 2, 10, 9),
  as_factor() |>
  fct2num()

-# Outlier with labels, but no class of origin, handled like numeric vector
-# structure(c(1, 2, 3, 2, 10, 9),
-#   labels = c(Unknown = 9, Refused = 10)
-# ) |>
-#   as_factor() |>
-#   fct2num()
-
-v <- sample(6:19, 20, TRUE) |> factor()
-dput(v)
-named_levels(v)
-fct2num(v)
+structure(c(1, 2, 3, 2, 10, 9),
+  labels = c(Unknown = 9, Refused = 10)
+) |>
+  as_factor() |>
+  fct2num()
 }
--- a/man/format_redcap_factor.Rd
+++ b/man/format_redcap_factor.Rd
@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/read_redcap_tables.R
+\name{format_redcap_factor}
+\alias{format_redcap_factor}
+\title{Converts REDCap choices to factor levels and stores in labels attribute}
+\usage{
+format_redcap_factor(data, meta)
+}
+\arguments{
+\item{data}{vector}
+
+\item{meta}{vector of REDCap choices}
+}
+\value{
+vector of class "labelled" with a "labels" attribute
+}
+\description{
+Applying \link[REDCapCAST]{as_factor} to the data.frame or variable, will
+coerce to a factor.
+}
+\examples{
+format_redcap_factor(sample(1:3,20,TRUE),"1, First. | 2, second | 3, THIRD")
+}
--- a/man/get_api_key.Rd
+++ b/man/get_api_key.Rd
@ -4,14 +4,18 @@
 \alias{get_api_key}
 \title{Retrieve project API key if stored, if not, set and retrieve}
 \usage{
-get_api_key(key.name)
+get_api_key(key.name, ...)
 }
 \arguments{
 \item{key.name}{character vector of key name}
+
+\item{...}{passed to \link[keyring]{key_set}}
 }
 \value{
 character vector
 }
 \description{
-Retrieve project API key if stored, if not, set and retrieve
+Attempting to make secure API key storage so simple, that no other way makes
+sense. Wrapping \link[keyring]{key_get} and \link[keyring]{key_set} using the
+\link[keyring]{key_list} to check if key is in storage already.
 }
--- a/man/is.labelled.Rd
+++ b/man/is.labelled.Rd
@ -4,10 +4,7 @@
 \alias{is.labelled}
 \title{Tests for multiple label classes}
 \usage{
-is.labelled(
-  x,
-  classes = c("redcapcast_labelled", "haven_labelled", "labelled")
-)
+is.labelled(x, classes = c("haven_labelled", "labelled"))
 }
 \arguments{
 \item{x}{data}
--- a/man/named_levels.Rd
+++ b/man/named_levels.Rd
@ -4,7 +4,13 @@
 \alias{named_levels}
 \title{Get named vector of factor levels and values}
 \usage{
-named_levels(data, label = "labels", na.label = NULL, na.value = 99)
+named_levels(
+  data,
+  label = "labels",
+  na.label = NULL,
+  na.value = 99,
+  sort.numeric = TRUE
+)
 }
 \arguments{
 \item{data}{factor}
@ -15,6 +21,8 @@ named_levels(data, label = "labels", na.label = NULL, na.value = 99)

 \item{na.value}{new value for NA strings. Ignored if na.label is NULL.
 Default is 99.}
+
+\item{sort.numeric}{sort factor levels if levels are numeric. Default is TRUE}
 }
 \value{
 named vector
@ -23,12 +31,16 @@ named vector
 Get named vector of factor levels and values
 }
 \examples{
-\dontrun{
 structure(c(1, 2, 3, 2, 10, 9),
  labels = c(Unknown = 9, Refused = 10),
  class = "haven_labelled"
 ) |>
  as_factor() |>
  named_levels()
-}
+structure(c(1, 2, 3, 2, 10, 9),
+  labels = c(Unknown = 9, Refused = 10),
+  class = "labelled"
+) |>
+  as_factor() |>
+  named_levels()
 }
--- a/man/possibly_numeric.Rd
+++ b/man/possibly_numeric.Rd
@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as_factor.R
+\name{possibly_numeric}
+\alias{possibly_numeric}
+\title{Tests if vector can be interpreted as numeric without introducing NAs by
+coercion}
+\usage{
+possibly_numeric(data)
+}
+\arguments{
+\item{data}{vector}
+}
+\value{
+logical
+}
+\description{
+Tests if vector can be interpreted as numeric without introducing NAs by
+coercion
+}
+\examples{
+c("1","5") |> possibly_numeric()
+c("1","5","e") |> possibly_numeric()
+}
--- a/man/read_redcap_tables.Rd
+++ b/man/read_redcap_tables.Rd
@ -11,8 +11,9 @@ read_redcap_tables(
  fields = NULL,
  events = NULL,
  forms = NULL,
-  raw_or_label = "label",
-  split_forms = "all"
+  raw_or_label = c("raw", "label", "both"),
+  split_forms = "all",
+  ...
 )
 }
 \arguments{
@ -28,27 +29,32 @@ read_redcap_tables(

 \item{forms}{forms to download}

-\item{raw_or_label}{raw or label tags. Can be
+\item{raw_or_label}{raw or label tags. Can be "raw", "label" or "both".

-  * "raw": Standard [REDCapR] method to get raw values.
-  * "label": Standard [REDCapR] method to get label values.
+  * "raw": Standard \link[REDCapR]{redcap_read} method to get raw values.
+  * "label": Standard \link[REDCapR]{redcap_read} method to get label values.
  * "both": Get raw values with REDCap labels applied as labels. Use
-  [as_factor()] to format factors with original labels and use the
-  [gtsummary] package to easily get beautiful tables with original labels
-  from REDCap. Use [fct_drop()] to drop empty levels.}
+  \link[REDCapCAST]{as_factor} to format factors with original labels and use
+  the  `gtsummary` package functions like \link[gtsummary]{tbl_summary} to
+  easily get beautiful tables with original labels from REDCap. Use
+  \link[REDCapCAST]{fct_drop} to drop empty levels.}

 \item{split_forms}{Whether to split "repeating" or "all" forms, default is
 all.}
+
+\item{...}{passed on to \link[REDCapR]{redcap_read}}
 }
 \value{
 list of instruments
 }
 \description{
-Implementation of REDCap_split with a focused data acquisition approach using
-REDCapR::redcap_read and only downloading specified fields, forms and/or
-events using the built-in focused_metadata including some clean-up.
+Implementation of passed on to \link[REDCapCAST]{REDCap_split} with a focused
+data acquisition approach using passed on to \link[REDCapR]{redcap_read} and
+only downloading specified fields, forms and/or events using the built-in
+focused_metadata including some clean-up.
 Works with classical and longitudinal projects with or without repeating
 instruments.
+Will preserve metadata in the data.frames as labels.
 }
 \examples{
 # Examples will be provided later
--- a/man/redcap_wider.Rd
+++ b/man/redcap_wider.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/redcap_wider.R
 \name{redcap_wider}
 \alias{redcap_wider}
-\title{Redcap Wider}
+\title{Transforms list of REDCap data.frames to a single wide data.frame}
 \usage{
 redcap_wider(
  data,
@ -11,18 +11,20 @@ redcap_wider(
 )
 }
 \arguments{
-\item{data}{A list of data frames.}
+\item{data}{A list of data frames}

-\item{event.glue}{A dplyr::glue string for repeated events naming}
+\item{event.glue}{A \link[glue]{glue} string for repeated events naming}

-\item{inst.glue}{A dplyr::glue string for repeated instruments naming}
+\item{inst.glue}{A \link[glue]{glue} string for repeated instruments naming}
 }
 \value{
-The list of data frames in wide format.
+data.frame in wide format
 }
 \description{
-Converts a list of REDCap data frames from long to wide format.
-Handles longitudinal projects, but not yet repeated instruments.
+Converts a list of REDCap data.frames from long to wide format.
+In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
+on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
+split by \link[REDCapCAST]{REDCap_split}.
 }
 \examples{
 # Longitudinal
--- a/man/sanitize_split.Rd
+++ b/man/sanitize_split.Rd
@ -7,13 +7,20 @@
 sanitize_split(
  l,
  generic.names = c("redcap_event_name", "redcap_repeat_instrument",
-    "redcap_repeat_instance")
+    "redcap_repeat_instance"),
+  drop.complete = TRUE,
+  drop.empty = TRUE
 )
 }
 \arguments{
 \item{l}{A list of data frames.}

 \item{generic.names}{A vector of generic names to be excluded.}
+
+\item{drop.complete}{logical to remove genric REDCap variables indicating
+instrument completion. Default is TRUE.}
+
+\item{drop.empty}{logical to remove variables with only NAs Default is TRUE.}
 }
 \value{
 A list of data frames with generic names excluded.