mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2025-01-18 21:16:34 +01:00
support labelled data
This commit is contained in:
parent
5926c12da6
commit
2aa268f747
@ -4,14 +4,20 @@ utils::globalVariables(c(
|
||||
"inst.glue"
|
||||
))
|
||||
|
||||
#' @title Redcap Wider
|
||||
#' @description Converts a list of REDCap data frames from long to wide format.
|
||||
#' Handles longitudinal projects, but not yet repeated instruments.
|
||||
#' @param data A list of data frames.
|
||||
#' @param event.glue A dplyr::glue string for repeated events naming
|
||||
#' @param inst.glue A dplyr::glue string for repeated instruments naming
|
||||
#' @return The list of data frames in wide format.
|
||||
#' Transforms list of REDCap data.frames to a single wide data.frame
|
||||
#'
|
||||
#' @description Converts a list of REDCap data.frames from long to wide format.
|
||||
#' In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
|
||||
#' on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
|
||||
#' split by \link[REDCapCAST]{REDCap_split}.
|
||||
#'
|
||||
#' @param data A list of data frames
|
||||
#' @param event.glue A \link[glue]{glue} string for repeated events naming
|
||||
#' @param inst.glue A \link[glue]{glue} string for repeated instruments naming
|
||||
#'
|
||||
#' @return data.frame in wide format
|
||||
#' @export
|
||||
#'
|
||||
#' @importFrom tidyr pivot_wider
|
||||
#' @importFrom tidyselect all_of
|
||||
#' @importFrom purrr reduce
|
||||
@ -77,6 +83,7 @@ redcap_wider <-
|
||||
function(data,
|
||||
event.glue = "{.value}_{redcap_event_name}",
|
||||
inst.glue = "{.value}_{redcap_repeat_instance}") {
|
||||
# browser()
|
||||
if (!is_repeated_longitudinal(data)) {
|
||||
if (is.list(data)) {
|
||||
if (length(data) == 1) {
|
||||
@ -91,6 +98,7 @@ redcap_wider <-
|
||||
id.name <- do.call(c, lapply(data, names))[[1]]
|
||||
|
||||
l <- lapply(data, function(i) {
|
||||
# browser()
|
||||
rep_inst <- "redcap_repeat_instrument" %in% names(i)
|
||||
|
||||
if (rep_inst) {
|
||||
@ -111,7 +119,15 @@ redcap_wider <-
|
||||
)
|
||||
s[!colnames(s) %in% c("redcap_repeat_instrument")]
|
||||
})
|
||||
i <- Reduce(dplyr::bind_rows, k)
|
||||
|
||||
# Labels are removed and restored after bind_rows as class "labelled"
|
||||
# is not supported
|
||||
i <- remove_labelled(k) |>
|
||||
dplyr::bind_rows()
|
||||
|
||||
all_labels <- save_labels(data)
|
||||
|
||||
i <- restore_labels(i, all_labels)
|
||||
}
|
||||
|
||||
event <- "redcap_event_name" %in% names(i)
|
||||
@ -141,8 +157,51 @@ redcap_wider <-
|
||||
}
|
||||
})
|
||||
|
||||
out <- data.frame(Reduce(f = dplyr::full_join, x = l))
|
||||
# out <- Reduce(f = dplyr::full_join, x = l)
|
||||
out <- purrr::reduce(.x = l, .f = dplyr::full_join)
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
# Applies list of attributes to data.frame
|
||||
restore_labels <- function(data, labels) {
|
||||
stopifnot(is.list(labels))
|
||||
stopifnot(is.data.frame(data))
|
||||
for (ndx in names(labels)) {
|
||||
data <- purrr::imap(data, \(.y, .j){
|
||||
if (startsWith(.j, ndx)) {
|
||||
set_attr(.y, labels[[ndx]])
|
||||
} else {
|
||||
.y
|
||||
}
|
||||
}) |> dplyr::bind_cols()
|
||||
}
|
||||
return(data)
|
||||
}
|
||||
|
||||
# Extract unique variable attributes from list of data.frames
|
||||
save_labels <- function(data) {
|
||||
stopifnot(is.list(data))
|
||||
out <- list()
|
||||
for (j in seq_along(data)) {
|
||||
out <- c(out, lapply(data[[j]], get_attr))
|
||||
}
|
||||
|
||||
out[!duplicated(names(out))]
|
||||
}
|
||||
|
||||
# Removes class attributes of class "labelled" or "haven_labelled"
|
||||
remove_labelled <- function(data){
|
||||
stopifnot(is.list(data))
|
||||
lapply(data, \(.x) {
|
||||
lapply(.x, \(.y) {
|
||||
if (REDCapCAST::is.labelled(.y)) {
|
||||
set_attr(.y, label = NULL, attr = "class")
|
||||
} else {
|
||||
.y
|
||||
}
|
||||
}) |>
|
||||
dplyr::bind_cols()
|
||||
})
|
||||
}
|
||||
|
@ -21,8 +21,7 @@ call.}
|
||||
JSON from an API call.}
|
||||
|
||||
\item{primary_table_name}{Name given to the list element for the primary
|
||||
output table (as described in \emph{README.md}). Ignored if
|
||||
\code{forms = 'all'}.}
|
||||
output table. Ignored if \code{forms = 'all'}.}
|
||||
|
||||
\item{forms}{Indicate whether to create separate tables for repeating
|
||||
instruments only or for all forms.}
|
||||
@ -66,7 +65,7 @@ metadata <- postForm(
|
||||
)
|
||||
|
||||
# Convert exported JSON strings into a list of data.frames
|
||||
REDCapRITS::REDCap_split(records, metadata)
|
||||
REDCapCAST::REDCap_split(records, metadata)
|
||||
|
||||
# Using a raw data export -------------------------------------------------
|
||||
|
||||
@ -79,7 +78,7 @@ metadata <- read.csv(
|
||||
)
|
||||
|
||||
# Split the tables
|
||||
REDCapRITS::REDCap_split(records, metadata)
|
||||
REDCapCAST::REDCap_split(records, metadata)
|
||||
|
||||
# In conjunction with the R export script ---------------------------------
|
||||
|
||||
@ -96,10 +95,10 @@ source("ExampleProject_R_2018-06-03_1700.r")
|
||||
metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
|
||||
|
||||
# Split the tables
|
||||
REDCapRITS::REDCap_split(data, metadata)
|
||||
REDCapCAST::REDCap_split(data, metadata)
|
||||
setwd(old)
|
||||
}
|
||||
}
|
||||
\author{
|
||||
Paul W. Egeler, M.S., GStat
|
||||
Paul W. Egeler
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
\alias{as_factor.character}
|
||||
\alias{as_factor.haven_labelled}
|
||||
\alias{as_factor.labelled}
|
||||
\alias{as_factor.redcapcast_labelled}
|
||||
\alias{as_factor.data.frame}
|
||||
\title{Convert labelled vectors to factors while preserving attributes}
|
||||
\usage{
|
||||
@ -36,13 +35,6 @@ as_factor(x, ...)
|
||||
...
|
||||
)
|
||||
|
||||
\method{as_factor}{redcapcast_labelled}(
|
||||
x,
|
||||
levels = c("default", "labels", "values", "both"),
|
||||
ordered = FALSE,
|
||||
...
|
||||
)
|
||||
|
||||
\method{as_factor}{data.frame}(x, ..., only_labelled = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
@ -64,7 +56,7 @@ as_factor(x, ...)
|
||||
\item{only_labelled}{Only apply to labelled columns?}
|
||||
}
|
||||
\description{
|
||||
This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
|
||||
This extends \link[forcats]{as_factor} as well as \link[haven]{as_factor}, by appending
|
||||
original attributes except for "class" after converting to factor to avoid
|
||||
ta loss in case of rich formatted and labelled data.
|
||||
}
|
||||
|
@ -8,13 +8,13 @@ easy_redcap(project.name, widen.data = TRUE, uri, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{project.name}{The name of the current project (for key storage with
|
||||
`keyring::key_set()`, using the default keyring)}
|
||||
\link[keyring]{key_set}, using the default keyring)}
|
||||
|
||||
\item{widen.data}{argument to widen the exported data}
|
||||
|
||||
\item{uri}{REDCap database API uri}
|
||||
|
||||
\item{...}{arguments passed on to `REDCapCAST::read_redcap_tables()`}
|
||||
\item{...}{arguments passed on to \link[REDCapCAST]{read_redcap_tables}.}
|
||||
}
|
||||
\value{
|
||||
data.frame or list depending on widen.data
|
||||
@ -22,3 +22,8 @@ data.frame or list depending on widen.data
|
||||
\description{
|
||||
Secure API key storage and data acquisition in one
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
easy_redcap("My_new_project",fields=c("record_id","age","hypertension"))
|
||||
}
|
||||
}
|
||||
|
@ -34,15 +34,9 @@ structure(c(1, 2, 3, 2, 10, 9),
|
||||
as_factor() |>
|
||||
fct2num()
|
||||
|
||||
# Outlier with labels, but no class of origin, handled like numeric vector
|
||||
# structure(c(1, 2, 3, 2, 10, 9),
|
||||
# labels = c(Unknown = 9, Refused = 10)
|
||||
# ) |>
|
||||
# as_factor() |>
|
||||
# fct2num()
|
||||
|
||||
v <- sample(6:19, 20, TRUE) |> factor()
|
||||
dput(v)
|
||||
named_levels(v)
|
||||
fct2num(v)
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10)
|
||||
) |>
|
||||
as_factor() |>
|
||||
fct2num()
|
||||
}
|
||||
|
23
man/format_redcap_factor.Rd
Normal file
23
man/format_redcap_factor.Rd
Normal file
@ -0,0 +1,23 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/read_redcap_tables.R
|
||||
\name{format_redcap_factor}
|
||||
\alias{format_redcap_factor}
|
||||
\title{Converts REDCap choices to factor levels and stores in labels attribute}
|
||||
\usage{
|
||||
format_redcap_factor(data, meta)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{vector}
|
||||
|
||||
\item{meta}{vector of REDCap choices}
|
||||
}
|
||||
\value{
|
||||
vector of class "labelled" with a "labels" attribute
|
||||
}
|
||||
\description{
|
||||
Applying \link[REDCapCAST]{as_factor} to the data.frame or variable, will
|
||||
coerce to a factor.
|
||||
}
|
||||
\examples{
|
||||
format_redcap_factor(sample(1:3,20,TRUE),"1, First. | 2, second | 3, THIRD")
|
||||
}
|
@ -4,14 +4,18 @@
|
||||
\alias{get_api_key}
|
||||
\title{Retrieve project API key if stored, if not, set and retrieve}
|
||||
\usage{
|
||||
get_api_key(key.name)
|
||||
get_api_key(key.name, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{key.name}{character vector of key name}
|
||||
|
||||
\item{...}{passed to \link[keyring]{key_set}}
|
||||
}
|
||||
\value{
|
||||
character vector
|
||||
}
|
||||
\description{
|
||||
Retrieve project API key if stored, if not, set and retrieve
|
||||
Attempting to make secure API key storage so simple, that no other way makes
|
||||
sense. Wrapping \link[keyring]{key_get} and \link[keyring]{key_set} using the
|
||||
\link[keyring]{key_list} to check if key is in storage already.
|
||||
}
|
||||
|
@ -4,10 +4,7 @@
|
||||
\alias{is.labelled}
|
||||
\title{Tests for multiple label classes}
|
||||
\usage{
|
||||
is.labelled(
|
||||
x,
|
||||
classes = c("redcapcast_labelled", "haven_labelled", "labelled")
|
||||
)
|
||||
is.labelled(x, classes = c("haven_labelled", "labelled"))
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{data}
|
||||
|
@ -4,7 +4,13 @@
|
||||
\alias{named_levels}
|
||||
\title{Get named vector of factor levels and values}
|
||||
\usage{
|
||||
named_levels(data, label = "labels", na.label = NULL, na.value = 99)
|
||||
named_levels(
|
||||
data,
|
||||
label = "labels",
|
||||
na.label = NULL,
|
||||
na.value = 99,
|
||||
sort.numeric = TRUE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{factor}
|
||||
@ -15,6 +21,8 @@ named_levels(data, label = "labels", na.label = NULL, na.value = 99)
|
||||
|
||||
\item{na.value}{new value for NA strings. Ignored if na.label is NULL.
|
||||
Default is 99.}
|
||||
|
||||
\item{sort.numeric}{sort factor levels if levels are numeric. Default is TRUE}
|
||||
}
|
||||
\value{
|
||||
named vector
|
||||
@ -23,12 +31,16 @@ named vector
|
||||
Get named vector of factor levels and values
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10),
|
||||
class = "haven_labelled"
|
||||
) |>
|
||||
as_factor() |>
|
||||
named_levels()
|
||||
}
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10),
|
||||
class = "labelled"
|
||||
) |>
|
||||
as_factor() |>
|
||||
named_levels()
|
||||
}
|
||||
|
23
man/possibly_numeric.Rd
Normal file
23
man/possibly_numeric.Rd
Normal file
@ -0,0 +1,23 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{possibly_numeric}
|
||||
\alias{possibly_numeric}
|
||||
\title{Tests if vector can be interpreted as numeric without introducing NAs by
|
||||
coercion}
|
||||
\usage{
|
||||
possibly_numeric(data)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{vector}
|
||||
}
|
||||
\value{
|
||||
logical
|
||||
}
|
||||
\description{
|
||||
Tests if vector can be interpreted as numeric without introducing NAs by
|
||||
coercion
|
||||
}
|
||||
\examples{
|
||||
c("1","5") |> possibly_numeric()
|
||||
c("1","5","e") |> possibly_numeric()
|
||||
}
|
@ -11,8 +11,9 @@ read_redcap_tables(
|
||||
fields = NULL,
|
||||
events = NULL,
|
||||
forms = NULL,
|
||||
raw_or_label = "label",
|
||||
split_forms = "all"
|
||||
raw_or_label = c("raw", "label", "both"),
|
||||
split_forms = "all",
|
||||
...
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
@ -28,27 +29,32 @@ read_redcap_tables(
|
||||
|
||||
\item{forms}{forms to download}
|
||||
|
||||
\item{raw_or_label}{raw or label tags. Can be
|
||||
\item{raw_or_label}{raw or label tags. Can be "raw", "label" or "both".
|
||||
|
||||
* "raw": Standard [REDCapR] method to get raw values.
|
||||
* "label": Standard [REDCapR] method to get label values.
|
||||
* "raw": Standard \link[REDCapR]{redcap_read} method to get raw values.
|
||||
* "label": Standard \link[REDCapR]{redcap_read} method to get label values.
|
||||
* "both": Get raw values with REDCap labels applied as labels. Use
|
||||
[as_factor()] to format factors with original labels and use the
|
||||
[gtsummary] package to easily get beautiful tables with original labels
|
||||
from REDCap. Use [fct_drop()] to drop empty levels.}
|
||||
\link[REDCapCAST]{as_factor} to format factors with original labels and use
|
||||
the `gtsummary` package functions like \link[gtsummary]{tbl_summary} to
|
||||
easily get beautiful tables with original labels from REDCap. Use
|
||||
\link[REDCapCAST]{fct_drop} to drop empty levels.}
|
||||
|
||||
\item{split_forms}{Whether to split "repeating" or "all" forms, default is
|
||||
all.}
|
||||
|
||||
\item{...}{passed on to \link[REDCapR]{redcap_read}}
|
||||
}
|
||||
\value{
|
||||
list of instruments
|
||||
}
|
||||
\description{
|
||||
Implementation of REDCap_split with a focused data acquisition approach using
|
||||
REDCapR::redcap_read and only downloading specified fields, forms and/or
|
||||
events using the built-in focused_metadata including some clean-up.
|
||||
Implementation of passed on to \link[REDCapCAST]{REDCap_split} with a focused
|
||||
data acquisition approach using passed on to \link[REDCapR]{redcap_read} and
|
||||
only downloading specified fields, forms and/or events using the built-in
|
||||
focused_metadata including some clean-up.
|
||||
Works with classical and longitudinal projects with or without repeating
|
||||
instruments.
|
||||
Will preserve metadata in the data.frames as labels.
|
||||
}
|
||||
\examples{
|
||||
# Examples will be provided later
|
||||
|
@ -2,7 +2,7 @@
|
||||
% Please edit documentation in R/redcap_wider.R
|
||||
\name{redcap_wider}
|
||||
\alias{redcap_wider}
|
||||
\title{Redcap Wider}
|
||||
\title{Transforms list of REDCap data.frames to a single wide data.frame}
|
||||
\usage{
|
||||
redcap_wider(
|
||||
data,
|
||||
@ -11,18 +11,20 @@ redcap_wider(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{A list of data frames.}
|
||||
\item{data}{A list of data frames}
|
||||
|
||||
\item{event.glue}{A dplyr::glue string for repeated events naming}
|
||||
\item{event.glue}{A \link[glue]{glue} string for repeated events naming}
|
||||
|
||||
\item{inst.glue}{A dplyr::glue string for repeated instruments naming}
|
||||
\item{inst.glue}{A \link[glue]{glue} string for repeated instruments naming}
|
||||
}
|
||||
\value{
|
||||
The list of data frames in wide format.
|
||||
data.frame in wide format
|
||||
}
|
||||
\description{
|
||||
Converts a list of REDCap data frames from long to wide format.
|
||||
Handles longitudinal projects, but not yet repeated instruments.
|
||||
Converts a list of REDCap data.frames from long to wide format.
|
||||
In essence it is a wrapper for the \link[tidyr]{pivot_wider} function applied
|
||||
on a REDCap output (from \link[REDCapCAST]{read_redcap_tables}) or manually
|
||||
split by \link[REDCapCAST]{REDCap_split}.
|
||||
}
|
||||
\examples{
|
||||
# Longitudinal
|
||||
|
@ -7,13 +7,20 @@
|
||||
sanitize_split(
|
||||
l,
|
||||
generic.names = c("redcap_event_name", "redcap_repeat_instrument",
|
||||
"redcap_repeat_instance")
|
||||
"redcap_repeat_instance"),
|
||||
drop.complete = TRUE,
|
||||
drop.empty = TRUE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{l}{A list of data frames.}
|
||||
|
||||
\item{generic.names}{A vector of generic names to be excluded.}
|
||||
|
||||
\item{drop.complete}{logical to remove genric REDCap variables indicating
|
||||
instrument completion. Default is TRUE.}
|
||||
|
||||
\item{drop.empty}{logical to remove variables with only NAs Default is TRUE.}
|
||||
}
|
||||
\value{
|
||||
A list of data frames with generic names excluded.
|
||||
|
Loading…
x
Reference in New Issue
Block a user