support labelled data

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-11-28 14:32:03 +01:00
parent 5926c12da6
commit 2aa268f747
No known key found for this signature in database
13 changed files with 188 additions and 65 deletions

View File

@ -4,14 +4,20 @@ utils::globalVariables(c(
"inst.glue" "inst.glue"
)) ))
#' @title Redcap Wider #' Transforms list of REDCap data.frames to a single wide data.frame
#' @description Converts a list of REDCap data frames from long to wide format. #'
#' Handles longitudinal projects, but not yet repeated instruments. #' @description Converts a list of REDCap data.frames from long to wide format.
#' @param data A list of data frames. #' In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
#' @param event.glue A dplyr::glue string for repeated events naming #' on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
#' @param inst.glue A dplyr::glue string for repeated instruments naming #' split by \link[REDCapCAST]{REDCap_split}.
#' @return The list of data frames in wide format. #'
#' @param data A list of data frames
#' @param event.glue A \link[glue]{glue} string for repeated events naming
#' @param inst.glue A \link[glue]{glue} string for repeated instruments naming
#'
#' @return data.frame in wide format
#' @export #' @export
#'
#' @importFrom tidyr pivot_wider #' @importFrom tidyr pivot_wider
#' @importFrom tidyselect all_of #' @importFrom tidyselect all_of
#' @importFrom purrr reduce #' @importFrom purrr reduce
@ -77,6 +83,7 @@ redcap_wider <-
function(data, function(data,
event.glue = "{.value}_{redcap_event_name}", event.glue = "{.value}_{redcap_event_name}",
inst.glue = "{.value}_{redcap_repeat_instance}") { inst.glue = "{.value}_{redcap_repeat_instance}") {
# browser()
if (!is_repeated_longitudinal(data)) { if (!is_repeated_longitudinal(data)) {
if (is.list(data)) { if (is.list(data)) {
if (length(data) == 1) { if (length(data) == 1) {
@ -91,6 +98,7 @@ redcap_wider <-
id.name <- do.call(c, lapply(data, names))[[1]] id.name <- do.call(c, lapply(data, names))[[1]]
l <- lapply(data, function(i) { l <- lapply(data, function(i) {
# browser()
rep_inst <- "redcap_repeat_instrument" %in% names(i) rep_inst <- "redcap_repeat_instrument" %in% names(i)
if (rep_inst) { if (rep_inst) {
@ -111,7 +119,15 @@ redcap_wider <-
) )
s[!colnames(s) %in% c("redcap_repeat_instrument")] s[!colnames(s) %in% c("redcap_repeat_instrument")]
}) })
i <- Reduce(dplyr::bind_rows, k)
# Labels are removed and restored after bind_rows as class "labelled"
# is not supported
i <- remove_labelled(k) |>
dplyr::bind_rows()
all_labels <- save_labels(data)
i <- restore_labels(i, all_labels)
} }
event <- "redcap_event_name" %in% names(i) event <- "redcap_event_name" %in% names(i)
@ -141,8 +157,51 @@ redcap_wider <-
} }
}) })
out <- data.frame(Reduce(f = dplyr::full_join, x = l)) # out <- Reduce(f = dplyr::full_join, x = l)
out <- purrr::reduce(.x = l, .f = dplyr::full_join)
} }
out out
} }
# Applies list of attributes to data.frame
restore_labels <- function(data, labels) {
stopifnot(is.list(labels))
stopifnot(is.data.frame(data))
for (ndx in names(labels)) {
data <- purrr::imap(data, \(.y, .j){
if (startsWith(.j, ndx)) {
set_attr(.y, labels[[ndx]])
} else {
.y
}
}) |> dplyr::bind_cols()
}
return(data)
}
# Extract unique variable attributes from list of data.frames
save_labels <- function(data) {
stopifnot(is.list(data))
out <- list()
for (j in seq_along(data)) {
out <- c(out, lapply(data[[j]], get_attr))
}
out[!duplicated(names(out))]
}
# Removes class attributes of class "labelled" or "haven_labelled"
remove_labelled <- function(data){
stopifnot(is.list(data))
lapply(data, \(.x) {
lapply(.x, \(.y) {
if (REDCapCAST::is.labelled(.y)) {
set_attr(.y, label = NULL, attr = "class")
} else {
.y
}
}) |>
dplyr::bind_cols()
})
}

View File

@ -21,8 +21,7 @@ call.}
JSON from an API call.} JSON from an API call.}
\item{primary_table_name}{Name given to the list element for the primary \item{primary_table_name}{Name given to the list element for the primary
output table (as described in \emph{README.md}). Ignored if output table. Ignored if \code{forms = 'all'}.}
\code{forms = 'all'}.}
\item{forms}{Indicate whether to create separate tables for repeating \item{forms}{Indicate whether to create separate tables for repeating
instruments only or for all forms.} instruments only or for all forms.}
@ -66,7 +65,7 @@ metadata <- postForm(
) )
# Convert exported JSON strings into a list of data.frames # Convert exported JSON strings into a list of data.frames
REDCapRITS::REDCap_split(records, metadata) REDCapCAST::REDCap_split(records, metadata)
# Using a raw data export ------------------------------------------------- # Using a raw data export -------------------------------------------------
@ -79,7 +78,7 @@ metadata <- read.csv(
) )
# Split the tables # Split the tables
REDCapRITS::REDCap_split(records, metadata) REDCapCAST::REDCap_split(records, metadata)
# In conjunction with the R export script --------------------------------- # In conjunction with the R export script ---------------------------------
@ -96,10 +95,10 @@ source("ExampleProject_R_2018-06-03_1700.r")
metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv") metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
# Split the tables # Split the tables
REDCapRITS::REDCap_split(data, metadata) REDCapCAST::REDCap_split(data, metadata)
setwd(old) setwd(old)
} }
} }
\author{ \author{
Paul W. Egeler, M.S., GStat Paul W. Egeler
} }

View File

@ -8,7 +8,6 @@
\alias{as_factor.character} \alias{as_factor.character}
\alias{as_factor.haven_labelled} \alias{as_factor.haven_labelled}
\alias{as_factor.labelled} \alias{as_factor.labelled}
\alias{as_factor.redcapcast_labelled}
\alias{as_factor.data.frame} \alias{as_factor.data.frame}
\title{Convert labelled vectors to factors while preserving attributes} \title{Convert labelled vectors to factors while preserving attributes}
\usage{ \usage{
@ -36,13 +35,6 @@ as_factor(x, ...)
... ...
) )
\method{as_factor}{redcapcast_labelled}(
x,
levels = c("default", "labels", "values", "both"),
ordered = FALSE,
...
)
\method{as_factor}{data.frame}(x, ..., only_labelled = TRUE) \method{as_factor}{data.frame}(x, ..., only_labelled = TRUE)
} }
\arguments{ \arguments{
@ -64,7 +56,7 @@ as_factor(x, ...)
\item{only_labelled}{Only apply to labelled columns?} \item{only_labelled}{Only apply to labelled columns?}
} }
\description{ \description{
This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending This extends \link[forcats]{as_factor} as well as \link[haven]{as_factor}, by appending
original attributes except for "class" after converting to factor to avoid original attributes except for "class" after converting to factor to avoid
ta loss in case of rich formatted and labelled data. ta loss in case of rich formatted and labelled data.
} }

View File

@ -8,13 +8,13 @@ easy_redcap(project.name, widen.data = TRUE, uri, ...)
} }
\arguments{ \arguments{
\item{project.name}{The name of the current project (for key storage with \item{project.name}{The name of the current project (for key storage with
`keyring::key_set()`, using the default keyring)} \link[keyring]{key_set}, using the default keyring)}
\item{widen.data}{argument to widen the exported data} \item{widen.data}{argument to widen the exported data}
\item{uri}{REDCap database API uri} \item{uri}{REDCap database API uri}
\item{...}{arguments passed on to `REDCapCAST::read_redcap_tables()`} \item{...}{arguments passed on to \link[REDCapCAST]{read_redcap_tables}.}
} }
\value{ \value{
data.frame or list depending on widen.data data.frame or list depending on widen.data
@ -22,3 +22,8 @@ data.frame or list depending on widen.data
\description{ \description{
Secure API key storage and data acquisition in one Secure API key storage and data acquisition in one
} }
\examples{
\dontrun{
easy_redcap("My_new_project",fields=c("record_id","age","hypertension"))
}
}

View File

@ -34,15 +34,9 @@ structure(c(1, 2, 3, 2, 10, 9),
as_factor() |> as_factor() |>
fct2num() fct2num()
# Outlier with labels, but no class of origin, handled like numeric vector structure(c(1, 2, 3, 2, 10, 9),
# structure(c(1, 2, 3, 2, 10, 9), labels = c(Unknown = 9, Refused = 10)
# labels = c(Unknown = 9, Refused = 10) ) |>
# ) |> as_factor() |>
# as_factor() |> fct2num()
# fct2num()
v <- sample(6:19, 20, TRUE) |> factor()
dput(v)
named_levels(v)
fct2num(v)
} }

View File

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_redcap_tables.R
\name{format_redcap_factor}
\alias{format_redcap_factor}
\title{Converts REDCap choices to factor levels and stores in labels attribute}
\usage{
format_redcap_factor(data, meta)
}
\arguments{
\item{data}{vector}
\item{meta}{vector of REDCap choices}
}
\value{
vector of class "labelled" with a "labels" attribute
}
\description{
Applying \link[REDCapCAST]{as_factor} to the data.frame or variable, will
coerce to a factor.
}
\examples{
format_redcap_factor(sample(1:3,20,TRUE),"1, First. | 2, second | 3, THIRD")
}

View File

@ -4,14 +4,18 @@
\alias{get_api_key} \alias{get_api_key}
\title{Retrieve project API key if stored, if not, set and retrieve} \title{Retrieve project API key if stored, if not, set and retrieve}
\usage{ \usage{
get_api_key(key.name) get_api_key(key.name, ...)
} }
\arguments{ \arguments{
\item{key.name}{character vector of key name} \item{key.name}{character vector of key name}
\item{...}{passed to \link[keyring]{key_set}}
} }
\value{ \value{
character vector character vector
} }
\description{ \description{
Retrieve project API key if stored, if not, set and retrieve Attempting to make secure API key storage so simple, that no other way makes
sense. Wrapping \link[keyring]{key_get} and \link[keyring]{key_set} using the
\link[keyring]{key_list} to check if key is in storage already.
} }

View File

@ -4,10 +4,7 @@
\alias{is.labelled} \alias{is.labelled}
\title{Tests for multiple label classes} \title{Tests for multiple label classes}
\usage{ \usage{
is.labelled( is.labelled(x, classes = c("haven_labelled", "labelled"))
x,
classes = c("redcapcast_labelled", "haven_labelled", "labelled")
)
} }
\arguments{ \arguments{
\item{x}{data} \item{x}{data}

View File

@ -4,7 +4,13 @@
\alias{named_levels} \alias{named_levels}
\title{Get named vector of factor levels and values} \title{Get named vector of factor levels and values}
\usage{ \usage{
named_levels(data, label = "labels", na.label = NULL, na.value = 99) named_levels(
data,
label = "labels",
na.label = NULL,
na.value = 99,
sort.numeric = TRUE
)
} }
\arguments{ \arguments{
\item{data}{factor} \item{data}{factor}
@ -15,6 +21,8 @@ named_levels(data, label = "labels", na.label = NULL, na.value = 99)
\item{na.value}{new value for NA strings. Ignored if na.label is NULL. \item{na.value}{new value for NA strings. Ignored if na.label is NULL.
Default is 99.} Default is 99.}
\item{sort.numeric}{sort factor levels if levels are numeric. Default is TRUE}
} }
\value{ \value{
named vector named vector
@ -23,12 +31,16 @@ named vector
Get named vector of factor levels and values Get named vector of factor levels and values
} }
\examples{ \examples{
\dontrun{
structure(c(1, 2, 3, 2, 10, 9), structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10), labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled" class = "haven_labelled"
) |> ) |>
as_factor() |> as_factor() |>
named_levels() named_levels()
} structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "labelled"
) |>
as_factor() |>
named_levels()
} }

23
man/possibly_numeric.Rd Normal file
View File

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_factor.R
\name{possibly_numeric}
\alias{possibly_numeric}
\title{Tests if vector can be interpreted as numeric without introducing NAs by
coercion}
\usage{
possibly_numeric(data)
}
\arguments{
\item{data}{vector}
}
\value{
logical
}
\description{
Tests if vector can be interpreted as numeric without introducing NAs by
coercion
}
\examples{
c("1","5") |> possibly_numeric()
c("1","5","e") |> possibly_numeric()
}

View File

@ -11,8 +11,9 @@ read_redcap_tables(
fields = NULL, fields = NULL,
events = NULL, events = NULL,
forms = NULL, forms = NULL,
raw_or_label = "label", raw_or_label = c("raw", "label", "both"),
split_forms = "all" split_forms = "all",
...
) )
} }
\arguments{ \arguments{
@ -28,27 +29,32 @@ read_redcap_tables(
\item{forms}{forms to download} \item{forms}{forms to download}
\item{raw_or_label}{raw or label tags. Can be \item{raw_or_label}{raw or label tags. Can be "raw", "label" or "both".
* "raw": Standard [REDCapR] method to get raw values. * "raw": Standard \link[REDCapR]{redcap_read} method to get raw values.
* "label": Standard [REDCapR] method to get label values. * "label": Standard \link[REDCapR]{redcap_read} method to get label values.
* "both": Get raw values with REDCap labels applied as labels. Use * "both": Get raw values with REDCap labels applied as labels. Use
[as_factor()] to format factors with original labels and use the \link[REDCapCAST]{as_factor} to format factors with original labels and use
[gtsummary] package to easily get beautiful tables with original labels the `gtsummary` package functions like \link[gtsummary]{tbl_summary} to
from REDCap. Use [fct_drop()] to drop empty levels.} easily get beautiful tables with original labels from REDCap. Use
\link[REDCapCAST]{fct_drop} to drop empty levels.}
\item{split_forms}{Whether to split "repeating" or "all" forms, default is \item{split_forms}{Whether to split "repeating" or "all" forms, default is
all.} all.}
\item{...}{passed on to \link[REDCapR]{redcap_read}}
} }
\value{ \value{
list of instruments list of instruments
} }
\description{ \description{
Implementation of REDCap_split with a focused data acquisition approach using Implementation of passed on to \link[REDCapCAST]{REDCap_split} with a focused
REDCapR::redcap_read and only downloading specified fields, forms and/or data acquisition approach using passed on to \link[REDCapR]{redcap_read} and
events using the built-in focused_metadata including some clean-up. only downloading specified fields, forms and/or events using the built-in
focused_metadata including some clean-up.
Works with classical and longitudinal projects with or without repeating Works with classical and longitudinal projects with or without repeating
instruments. instruments.
Will preserve metadata in the data.frames as labels.
} }
\examples{ \examples{
# Examples will be provided later # Examples will be provided later

View File

@ -2,7 +2,7 @@
% Please edit documentation in R/redcap_wider.R % Please edit documentation in R/redcap_wider.R
\name{redcap_wider} \name{redcap_wider}
\alias{redcap_wider} \alias{redcap_wider}
\title{Redcap Wider} \title{Transforms list of REDCap data.frames to a single wide data.frame}
\usage{ \usage{
redcap_wider( redcap_wider(
data, data,
@ -11,18 +11,20 @@ redcap_wider(
) )
} }
\arguments{ \arguments{
\item{data}{A list of data frames.} \item{data}{A list of data frames}
\item{event.glue}{A dplyr::glue string for repeated events naming} \item{event.glue}{A \link[glue]{glue} string for repeated events naming}
\item{inst.glue}{A dplyr::glue string for repeated instruments naming} \item{inst.glue}{A \link[glue]{glue} string for repeated instruments naming}
} }
\value{ \value{
The list of data frames in wide format. data.frame in wide format
} }
\description{ \description{
Converts a list of REDCap data frames from long to wide format. Converts a list of REDCap data.frames from long to wide format.
Handles longitudinal projects, but not yet repeated instruments. In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
split by \link[REDCapCAST]{REDCap_split}.
} }
\examples{ \examples{
# Longitudinal # Longitudinal

View File

@ -7,13 +7,20 @@
sanitize_split( sanitize_split(
l, l,
generic.names = c("redcap_event_name", "redcap_repeat_instrument", generic.names = c("redcap_event_name", "redcap_repeat_instrument",
"redcap_repeat_instance") "redcap_repeat_instance"),
drop.complete = TRUE,
drop.empty = TRUE
) )
} }
\arguments{ \arguments{
\item{l}{A list of data frames.} \item{l}{A list of data frames.}
\item{generic.names}{A vector of generic names to be excluded.} \item{generic.names}{A vector of generic names to be excluded.}
\item{drop.complete}{logical to remove genric REDCap variables indicating
instrument completion. Default is TRUE.}
\item{drop.empty}{logical to remove variables with only NAs Default is TRUE.}
} }
\value{ \value{
A list of data frames with generic names excluded. A list of data frames with generic names excluded.