support labelled data

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-11-28 14:32:03 +01:00
parent 5926c12da6
commit 2aa268f747
No known key found for this signature in database
13 changed files with 188 additions and 65 deletions

View File

@ -4,14 +4,20 @@ utils::globalVariables(c(
"inst.glue"
))
#' @title Redcap Wider
#' @description Converts a list of REDCap data frames from long to wide format.
#' Handles longitudinal projects, but not yet repeated instruments.
#' @param data A list of data frames.
#' @param event.glue A dplyr::glue string for repeated events naming
#' @param inst.glue A dplyr::glue string for repeated instruments naming
#' @return The list of data frames in wide format.
#' Transforms list of REDCap data.frames to a single wide data.frame
#'
#' @description Converts a list of REDCap data.frames from long to wide format.
#' In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
#' on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
#' split by \link[REDCapCAST]{REDCap_split}.
#'
#' @param data A list of data frames
#' @param event.glue A \link[glue]{glue} string for repeated events naming
#' @param inst.glue A \link[glue]{glue} string for repeated instruments naming
#'
#' @return data.frame in wide format
#' @export
#'
#' @importFrom tidyr pivot_wider
#' @importFrom tidyselect all_of
#' @importFrom purrr reduce
@ -77,6 +83,7 @@ redcap_wider <-
function(data,
event.glue = "{.value}_{redcap_event_name}",
inst.glue = "{.value}_{redcap_repeat_instance}") {
# browser()
if (!is_repeated_longitudinal(data)) {
if (is.list(data)) {
if (length(data) == 1) {
@ -91,6 +98,7 @@ redcap_wider <-
id.name <- do.call(c, lapply(data, names))[[1]]
l <- lapply(data, function(i) {
# browser()
rep_inst <- "redcap_repeat_instrument" %in% names(i)
if (rep_inst) {
@ -111,7 +119,15 @@ redcap_wider <-
)
s[!colnames(s) %in% c("redcap_repeat_instrument")]
})
i <- Reduce(dplyr::bind_rows, k)
# Labels are removed and restored after bind_rows as class "labelled"
# is not supported
i <- remove_labelled(k) |>
dplyr::bind_rows()
all_labels <- save_labels(data)
i <- restore_labels(i, all_labels)
}
event <- "redcap_event_name" %in% names(i)
@ -141,8 +157,51 @@ redcap_wider <-
}
})
out <- data.frame(Reduce(f = dplyr::full_join, x = l))
# out <- Reduce(f = dplyr::full_join, x = l)
out <- purrr::reduce(.x = l, .f = dplyr::full_join)
}
out
}
# Applies list of attributes to data.frame
restore_labels <- function(data, labels) {
stopifnot(is.list(labels))
stopifnot(is.data.frame(data))
for (ndx in names(labels)) {
data <- purrr::imap(data, \(.y, .j){
if (startsWith(.j, ndx)) {
set_attr(.y, labels[[ndx]])
} else {
.y
}
}) |> dplyr::bind_cols()
}
return(data)
}
# Extract unique variable attributes from list of data.frames
save_labels <- function(data) {
stopifnot(is.list(data))
out <- list()
for (j in seq_along(data)) {
out <- c(out, lapply(data[[j]], get_attr))
}
out[!duplicated(names(out))]
}
# Removes class attributes of class "labelled" or "haven_labelled"
remove_labelled <- function(data){
stopifnot(is.list(data))
lapply(data, \(.x) {
lapply(.x, \(.y) {
if (REDCapCAST::is.labelled(.y)) {
set_attr(.y, label = NULL, attr = "class")
} else {
.y
}
}) |>
dplyr::bind_cols()
})
}

View File

@ -21,8 +21,7 @@ call.}
JSON from an API call.}
\item{primary_table_name}{Name given to the list element for the primary
output table (as described in \emph{README.md}). Ignored if
\code{forms = 'all'}.}
output table. Ignored if \code{forms = 'all'}.}
\item{forms}{Indicate whether to create separate tables for repeating
instruments only or for all forms.}
@ -66,7 +65,7 @@ metadata <- postForm(
)
# Convert exported JSON strings into a list of data.frames
REDCapRITS::REDCap_split(records, metadata)
REDCapCAST::REDCap_split(records, metadata)
# Using a raw data export -------------------------------------------------
@ -79,7 +78,7 @@ metadata <- read.csv(
)
# Split the tables
REDCapRITS::REDCap_split(records, metadata)
REDCapCAST::REDCap_split(records, metadata)
# In conjunction with the R export script ---------------------------------
@ -96,10 +95,10 @@ source("ExampleProject_R_2018-06-03_1700.r")
metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
# Split the tables
REDCapRITS::REDCap_split(data, metadata)
REDCapCAST::REDCap_split(data, metadata)
setwd(old)
}
}
\author{
Paul W. Egeler, M.S., GStat
Paul W. Egeler
}

View File

@ -8,7 +8,6 @@
\alias{as_factor.character}
\alias{as_factor.haven_labelled}
\alias{as_factor.labelled}
\alias{as_factor.redcapcast_labelled}
\alias{as_factor.data.frame}
\title{Convert labelled vectors to factors while preserving attributes}
\usage{
@ -36,13 +35,6 @@ as_factor(x, ...)
...
)
\method{as_factor}{redcapcast_labelled}(
x,
levels = c("default", "labels", "values", "both"),
ordered = FALSE,
...
)
\method{as_factor}{data.frame}(x, ..., only_labelled = TRUE)
}
\arguments{
@ -64,7 +56,7 @@ as_factor(x, ...)
\item{only_labelled}{Only apply to labelled columns?}
}
\description{
This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
This extends \link[forcats]{as_factor} as well as \link[haven]{as_factor}, by appending
original attributes except for "class" after converting to factor to avoid
ta loss in case of rich formatted and labelled data.
}

View File

@ -8,13 +8,13 @@ easy_redcap(project.name, widen.data = TRUE, uri, ...)
}
\arguments{
\item{project.name}{The name of the current project (for key storage with
`keyring::key_set()`, using the default keyring)}
\link[keyring]{key_set}, using the default keyring)}
\item{widen.data}{argument to widen the exported data}
\item{uri}{REDCap database API uri}
\item{...}{arguments passed on to `REDCapCAST::read_redcap_tables()`}
\item{...}{arguments passed on to \link[REDCapCAST]{read_redcap_tables}.}
}
\value{
data.frame or list depending on widen.data
@ -22,3 +22,8 @@ data.frame or list depending on widen.data
\description{
Secure API key storage and data acquisition in one
}
\examples{
\dontrun{
easy_redcap("My_new_project",fields=c("record_id","age","hypertension"))
}
}

View File

@ -34,15 +34,9 @@ structure(c(1, 2, 3, 2, 10, 9),
as_factor() |>
fct2num()
# Outlier with labels, but no class of origin, handled like numeric vector
# structure(c(1, 2, 3, 2, 10, 9),
# labels = c(Unknown = 9, Refused = 10)
# ) |>
# as_factor() |>
# fct2num()
v <- sample(6:19, 20, TRUE) |> factor()
dput(v)
named_levels(v)
fct2num(v)
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10)
) |>
as_factor() |>
fct2num()
}

View File

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_redcap_tables.R
\name{format_redcap_factor}
\alias{format_redcap_factor}
\title{Converts REDCap choices to factor levels and stores in labels attribute}
\usage{
format_redcap_factor(data, meta)
}
\arguments{
\item{data}{vector}
\item{meta}{vector of REDCap choices}
}
\value{
vector of class "labelled" with a "labels" attribute
}
\description{
Applying \link[REDCapCAST]{as_factor} to the data.frame or variable, will
coerce to a factor.
}
\examples{
format_redcap_factor(sample(1:3,20,TRUE),"1, First. | 2, second | 3, THIRD")
}

View File

@ -4,14 +4,18 @@
\alias{get_api_key}
\title{Retrieve project API key if stored, if not, set and retrieve}
\usage{
get_api_key(key.name)
get_api_key(key.name, ...)
}
\arguments{
\item{key.name}{character vector of key name}
\item{...}{passed to \link[keyring]{key_set}}
}
\value{
character vector
}
\description{
Retrieve project API key if stored, if not, set and retrieve
Attempting to make secure API key storage so simple, that no other way makes
sense. Wrapping \link[keyring]{key_get} and \link[keyring]{key_set} using the
\link[keyring]{key_list} to check if key is in storage already.
}

View File

@ -4,10 +4,7 @@
\alias{is.labelled}
\title{Tests for multiple label classes}
\usage{
is.labelled(
x,
classes = c("redcapcast_labelled", "haven_labelled", "labelled")
)
is.labelled(x, classes = c("haven_labelled", "labelled"))
}
\arguments{
\item{x}{data}

View File

@ -4,7 +4,13 @@
\alias{named_levels}
\title{Get named vector of factor levels and values}
\usage{
named_levels(data, label = "labels", na.label = NULL, na.value = 99)
named_levels(
data,
label = "labels",
na.label = NULL,
na.value = 99,
sort.numeric = TRUE
)
}
\arguments{
\item{data}{factor}
@ -15,6 +21,8 @@ named_levels(data, label = "labels", na.label = NULL, na.value = 99)
\item{na.value}{new value for NA strings. Ignored if na.label is NULL.
Default is 99.}
\item{sort.numeric}{sort factor levels if levels are numeric. Default is TRUE}
}
\value{
named vector
@ -23,12 +31,16 @@ named vector
Get named vector of factor levels and values
}
\examples{
\dontrun{
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
) |>
as_factor() |>
named_levels()
}
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "labelled"
) |>
as_factor() |>
named_levels()
}

23
man/possibly_numeric.Rd Normal file
View File

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_factor.R
\name{possibly_numeric}
\alias{possibly_numeric}
\title{Tests if vector can be interpreted as numeric without introducing NAs by
coercion}
\usage{
possibly_numeric(data)
}
\arguments{
\item{data}{vector}
}
\value{
logical
}
\description{
Tests if vector can be interpreted as numeric without introducing NAs by
coercion
}
\examples{
c("1","5") |> possibly_numeric()
c("1","5","e") |> possibly_numeric()
}

View File

@ -11,8 +11,9 @@ read_redcap_tables(
fields = NULL,
events = NULL,
forms = NULL,
raw_or_label = "label",
split_forms = "all"
raw_or_label = c("raw", "label", "both"),
split_forms = "all",
...
)
}
\arguments{
@ -28,27 +29,32 @@ read_redcap_tables(
\item{forms}{forms to download}
\item{raw_or_label}{raw or label tags. Can be
\item{raw_or_label}{raw or label tags. Can be "raw", "label" or "both".
* "raw": Standard [REDCapR] method to get raw values.
* "label": Standard [REDCapR] method to get label values.
* "raw": Standard \link[REDCapR]{redcap_read} method to get raw values.
* "label": Standard \link[REDCapR]{redcap_read} method to get label values.
* "both": Get raw values with REDCap labels applied as labels. Use
[as_factor()] to format factors with original labels and use the
[gtsummary] package to easily get beautiful tables with original labels
from REDCap. Use [fct_drop()] to drop empty levels.}
\link[REDCapCAST]{as_factor} to format factors with original labels and use
the `gtsummary` package functions like \link[gtsummary]{tbl_summary} to
easily get beautiful tables with original labels from REDCap. Use
\link[REDCapCAST]{fct_drop} to drop empty levels.}
\item{split_forms}{Whether to split "repeating" or "all" forms, default is
all.}
\item{...}{passed on to \link[REDCapR]{redcap_read}}
}
\value{
list of instruments
}
\description{
Implementation of REDCap_split with a focused data acquisition approach using
REDCapR::redcap_read and only downloading specified fields, forms and/or
events using the built-in focused_metadata including some clean-up.
Implementation of passed on to \link[REDCapCAST]{REDCap_split} with a focused
data acquisition approach using passed on to \link[REDCapR]{redcap_read} and
only downloading specified fields, forms and/or events using the built-in
focused_metadata including some clean-up.
Works with classical and longitudinal projects with or without repeating
instruments.
Will preserve metadata in the data.frames as labels.
}
\examples{
# Examples will be provided later

View File

@ -2,7 +2,7 @@
% Please edit documentation in R/redcap_wider.R
\name{redcap_wider}
\alias{redcap_wider}
\title{Redcap Wider}
\title{Transforms list of REDCap data.frames to a single wide data.frame}
\usage{
redcap_wider(
data,
@ -11,18 +11,20 @@ redcap_wider(
)
}
\arguments{
\item{data}{A list of data frames.}
\item{data}{A list of data frames}
\item{event.glue}{A dplyr::glue string for repeated events naming}
\item{event.glue}{A \link[glue]{glue} string for repeated events naming}
\item{inst.glue}{A dplyr::glue string for repeated instruments naming}
\item{inst.glue}{A \link[glue]{glue} string for repeated instruments naming}
}
\value{
The list of data frames in wide format.
data.frame in wide format
}
\description{
Converts a list of REDCap data frames from long to wide format.
Handles longitudinal projects, but not yet repeated instruments.
Converts a list of REDCap data.frames from long to wide format.
In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
split by \link[REDCapCAST]{REDCap_split}.
}
\examples{
# Longitudinal

View File

@ -7,13 +7,20 @@
sanitize_split(
l,
generic.names = c("redcap_event_name", "redcap_repeat_instrument",
"redcap_repeat_instance")
"redcap_repeat_instance"),
drop.complete = TRUE,
drop.empty = TRUE
)
}
\arguments{
\item{l}{A list of data frames.}
\item{generic.names}{A vector of generic names to be excluded.}
\item{drop.complete}{logical to remove genric REDCap variables indicating
instrument completion. Default is TRUE.}
\item{drop.empty}{logical to remove variables with only NAs Default is TRUE.}
}
\value{
A list of data frames with generic names excluded.