added option to export "both" raw and label by labelling raw data to preserve as much information as possible

This commit is contained in:
Andreas Gammelgaard Damsbo 2024-11-27 09:51:51 +01:00
parent 57f9f23ece
commit 2ba46e8e7a
No known key found for this signature in database
11 changed files with 177 additions and 230 deletions

View File

@ -31,6 +31,7 @@ easy_redcap <- function(project.name, widen.data = TRUE, uri, ...) {
out <- read_redcap_tables( out <- read_redcap_tables(
uri = uri, uri = uri,
token = key, token = key,
raw_or_label = "both",
... ...
) )
@ -40,167 +41,3 @@ easy_redcap <- function(project.name, widen.data = TRUE, uri, ...) {
out out
} }
#' REDCap read function to preserve field labels and all factor levels
#'
#' @description
#' This works very much as `read_redcap_tables()` and might end up there
#'
#'
#' @param uri REDCap database API uri
#' @param token API token
#' @param records records to download
#' @param fields fields to download
#' @param events events to download
#' @param forms forms to download
#' @param split_forms Whether to split "repeating" or "all" forms, default is
#' "all".
#'
#' @return data.frame or list
#' @export
#'
read_redcap_labelled <- function(uri,
token,
records = NULL,
fields = NULL,
events = NULL,
forms = NULL,
split_forms = "all") {
m <-
REDCapR::redcap_metadata_read(redcap_uri = uri, token = token)[["data"]]
# Tests
if (!is.null(fields)) {
fields_test <- fields %in% c(m$field_name, paste0(unique(m$form_name), "_complete"))
if (any(!fields_test)) {
print(paste0(
"The following field names are invalid: ",
paste(fields[!fields_test], collapse = ", "), "."
))
stop("Not all supplied field names are valid")
}
}
if (!is.null(forms)) {
forms_test <- forms %in% unique(m$form_name)
if (any(!forms_test)) {
print(paste0(
"The following form names are invalid: ",
paste(forms[!forms_test], collapse = ", "), "."
))
stop("Not all supplied form names are valid")
}
}
if (!is.null(events)) {
arm_event_inst <- REDCapR::redcap_event_instruments(
redcap_uri = uri,
token = token
)
event_test <- events %in% unique(arm_event_inst$data$unique_event_name)
if (any(!event_test)) {
print(paste0(
"The following event names are invalid: ",
paste(events[!event_test], collapse = ", "), "."
))
stop("Not all supplied event names are valid")
}
}
# Getting dataset
d <- REDCapR::redcap_read(
redcap_uri = uri,
token = token,
fields = fields,
events = events,
forms = forms,
records = records,
raw_or_label = "raw"
)[["data"]]
# Applying labels
d <- purrr::imap(d, \(.x, .i){
if (.i %in% m$field_name) {
# Does not handle checkboxes
out <- set_attr(.x,
label = clean_field_label(m$field_label[m$field_name == .i]),
attr = "label"
)
out
} else {
.x
}
}) |> dplyr::bind_cols()
d <- purrr::imap(d, \(.x, .i){
if (any(c("radio", "dropdown") %in% m$field_type[m$field_name == .i])) {
format_redcap_factor(.x, m$select_choices_or_calculations[m$field_name == .i])
} else {
.x
}
}) |> dplyr::bind_cols()
# Process repeat instrument naming
# Removes any extra characters other than a-z, 0-9 and "_", to mimic raw
# instrument names.
if ("redcap_repeat_instrument" %in% names(d)) {
d$redcap_repeat_instrument <- clean_redcap_name(d$redcap_repeat_instrument)
}
# Processing metadata to reflect focused dataset
m <- focused_metadata(m, names(d))
# Splitting
out <- REDCap_split(d,
m,
forms = split_forms,
primary_table_name = ""
)
sanitize_split(out)
}
#' Very simple function to remove rich text formatting from field label
#' and save the first paragraph ('<p>...</p>').
#'
#' @param data field label
#'
#' @return character vector
#' @export
#'
#' @examples
#' clean_field_label("<div class=\"rich-text-field-label\"><p>Fazekas score</p></div>")
clean_field_label <- function(data) {
out <- data |>
lapply(\(.x){
unlist(strsplit(.x, "</"))[1]
}) |>
lapply(\(.x){
splt <- unlist(strsplit(.x, ">"))
splt[length(splt)]
})
Reduce(c, out)
}
format_redcap_factor <- function(data, meta) {
lvls <- strsplit(meta, " | ", fixed = TRUE) |>
unlist() |>
lapply(\(.x){
splt <- unlist(strsplit(.x, ", "))
stats::setNames(splt[1], nm = paste(splt[-1], collapse = ", "))
}) |>
(\(.x){
Reduce(c, .x)
})()
set_attr(data, label = lvls, attr = "labels") |>
set_attr(data, label = "labelled", attr = "class") |>
as_factor()
}

View File

@ -11,7 +11,15 @@
#' @param fields fields to download #' @param fields fields to download
#' @param events events to download #' @param events events to download
#' @param forms forms to download #' @param forms forms to download
#' @param raw_or_label raw or label tags #' @param raw_or_label raw or label tags. Can be
#'
#' * "raw": Standard [REDCapR] method to get raw values.
#' * "label": Standard [REDCapR] method to get label values.
#' * "both": Get raw values with REDCap labels applied as labels. Use
#' [as_factor()] to format factors with original labels and use the
#' [gtsummary] package to easily get beautiful tables with original labels
#' from REDCap. Use [fct_drop()] to drop empty levels.
#'
#' @param split_forms Whether to split "repeating" or "all" forms, default is #' @param split_forms Whether to split "repeating" or "all" forms, default is
#' all. #' all.
#' #'
@ -70,6 +78,12 @@ read_redcap_tables <- function(uri,
} }
} }
if (raw_or_label=="both"){
rorl <- "raw"
} else {
rorl <- raw_or_label
}
# Getting dataset # Getting dataset
d <- REDCapR::redcap_read( d <- REDCapR::redcap_read(
redcap_uri = uri, redcap_uri = uri,
@ -78,9 +92,16 @@ read_redcap_tables <- function(uri,
events = events, events = events,
forms = forms, forms = forms,
records = records, records = records,
raw_or_label = raw_or_label raw_or_label = rorl
)[["data"]] )[["data"]]
if (raw_or_label=="both"){
d <- apply_field_label(data=d,meta=m)
d <- apply_factor_labels(data=d,meta=m)
}
# Process repeat instrument naming # Process repeat instrument naming
# Removes any extra characters other than a-z, 0-9 and "_", to mimic raw # Removes any extra characters other than a-z, 0-9 and "_", to mimic raw
# instrument names. # instrument names.
@ -101,3 +122,84 @@ read_redcap_tables <- function(uri,
sanitize_split(out) sanitize_split(out)
} }
#' Very simple function to remove rich text formatting from field label
#' and save the first paragraph ('<p>...</p>').
#'
#' @param data field label
#'
#' @return character vector
#' @export
#'
#' @examples
#' clean_field_label("<div class=\"rich-text-field-label\"><p>Fazekas score</p></div>")
clean_field_label <- function(data) {
out <- data |>
lapply(\(.x){
unlist(strsplit(.x, "</"))[1]
}) |>
lapply(\(.x){
splt <- unlist(strsplit(.x, ">"))
splt[length(splt)]
})
Reduce(c, out)
}
format_redcap_factor <- function(data, meta) {
lvls <- strsplit(meta, " | ", fixed = TRUE) |>
unlist() |>
lapply(\(.x){
splt <- unlist(strsplit(.x, ", "))
stats::setNames(splt[1], nm = paste(splt[-1], collapse = ", "))
}) |>
(\(.x){
Reduce(c, .x)
})()
set_attr(data, label = lvls, attr = "labels") |>
set_attr(data, label = "redcapcast_labelled", attr = "class")
}
#' Apply REDCap filed labels to data frame
#'
#' @param data REDCap exported data set
#' @param meta REDCap data dictionary
#'
#' @return data.frame
#' @export
#'
apply_field_label <- function(data,meta){
purrr::imap(data, \(.x, .i){
if (.i %in% meta$field_name) {
# Does not handle checkboxes
out <- set_attr(.x,
label = clean_field_label(meta$field_label[meta$field_name == .i]),
attr = "label"
)
out
} else {
.x
}
}) |> dplyr::bind_cols()
}
#' Preserve all factor levels from REDCap data dictionary in data export
#'
#' @param data REDCap exported data set
#' @param meta REDCap data dictionary
#'
#' @return data.frame
#' @export
#'
apply_factor_labels <- function(data,meta){
purrr::imap(data, \(.x, .i){
if (any(c("radio", "dropdown") %in% meta$field_type[meta$field_name == .i])) {
format_redcap_factor(.x, meta$select_choices_or_calculations[meta$field_name == .i])
} else {
.x
}
}) |> dplyr::bind_cols()
}

View File

@ -8,6 +8,8 @@
\alias{as_factor.character} \alias{as_factor.character}
\alias{as_factor.haven_labelled} \alias{as_factor.haven_labelled}
\alias{as_factor.labelled} \alias{as_factor.labelled}
\alias{as_factor.redcapcast_labelled}
\alias{as_factor.data.frame}
\title{Convert labelled vectors to factors while preserving attributes} \title{Convert labelled vectors to factors while preserving attributes}
\usage{ \usage{
as_factor(x, ...) as_factor(x, ...)
@ -33,6 +35,15 @@ as_factor(x, ...)
ordered = FALSE, ordered = FALSE,
... ...
) )
\method{as_factor}{redcapcast_labelled}(
x,
levels = c("default", "labels", "values", "both"),
ordered = FALSE,
...
)
\method{as_factor}{data.frame}(x, ..., only_labelled = TRUE)
} }
\arguments{ \arguments{
\item{x}{Object to coerce to a factor.} \item{x}{Object to coerce to a factor.}
@ -49,6 +60,8 @@ as_factor(x, ...)
\item{ordered}{If `TRUE` create an ordered (ordinal) factor, if \item{ordered}{If `TRUE` create an ordered (ordinal) factor, if
`FALSE` (the default) create a regular (nominal) factor.} `FALSE` (the default) create a regular (nominal) factor.}
\item{only_labelled}{Only apply to labelled columns?}
} }
\description{ \description{
This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/easy_redcap.R % Please edit documentation in R/read_redcap_tables.R
\name{clean_field_label} \name{clean_field_label}
\alias{clean_field_label} \alias{clean_field_label}
\title{Very simple function to remove rich text formatting from field label \title{Very simple function to remove rich text formatting from field label

View File

@ -11,7 +11,7 @@ ds2dd(
field.type = "text", field.type = "text",
field.label = NULL, field.label = NULL,
include.column.names = FALSE, include.column.names = FALSE,
metadata = REDCapCAST::redcap_meta_default() metadata = names(REDCapCAST::redcapcast_meta)
) )
} }
\arguments{ \arguments{
@ -34,7 +34,7 @@ names.}
column names for original data set for upload.} column names for original data set for upload.}
\item{metadata}{Metadata column names. Default is the included \item{metadata}{Metadata column names. Default is the included
REDCapCAST::redcap_meta_default.} names(REDCapCAST::redcapcast_meta).}
} }
\value{ \value{
data.frame or list of data.frame and vector data.frame or list of data.frame and vector

18
man/fct_drop.Rd Normal file
View File

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fct_drop.R
\name{fct_drop}
\alias{fct_drop}
\alias{fct_drop.data.frame}
\title{Drop unused levels preserving label data}
\usage{
fct_drop.data.frame(x, ...)
}
\arguments{
\item{x}{Factor to drop unused levels}
\item{...}{Other arguments passed down to method.}
}
\description{
This extends [forcats::fct_drop()] to natively work across a data.frame and
replace [base::droplevels()].
}

28
man/is.labelled.Rd Normal file
View File

@ -0,0 +1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_factor.R
\name{is.labelled}
\alias{is.labelled}
\title{Tests for multiple label classes}
\usage{
is.labelled(
x,
classes = c("redcapcast_labelled", "haven_labelled", "labelled")
)
}
\arguments{
\item{x}{data}
\item{classes}{classes to test}
}
\value{
logical
}
\description{
Tests for multiple label classes
}
\examples{
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
) |> is.labelled()
}

View File

@ -1,38 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/easy_redcap.R
\name{read_redcap_labelled}
\alias{read_redcap_labelled}
\title{REDCap read function to preserve field labels and all factor levels}
\usage{
read_redcap_labelled(
uri,
token,
records = NULL,
fields = NULL,
events = NULL,
forms = NULL,
split_forms = "all"
)
}
\arguments{
\item{uri}{REDCap database API uri}
\item{token}{API token}
\item{records}{records to download}
\item{fields}{fields to download}
\item{events}{events to download}
\item{forms}{forms to download}
\item{split_forms}{Whether to split "repeating" or "all" forms, default is
"all".}
}
\value{
data.frame or list
}
\description{
This works very much as `read_redcap_tables()` and might end up there
}

View File

@ -28,7 +28,14 @@ read_redcap_tables(
\item{forms}{forms to download} \item{forms}{forms to download}
\item{raw_or_label}{raw or label tags} \item{raw_or_label}{raw or label tags. Can be
* "raw": Standard [REDCapR] method to get raw values.
* "label": Standard [REDCapR] method to get label values.
* "both": Get raw values with REDCap labels applied as labels. Use
[as_factor()] to format factors with original labels and use the
[gtsummary] package to easily get beautiful tables with original labels
from REDCap. Use [fct_drop()] to drop empty levels.}
\item{split_forms}{Whether to split "repeating" or "all" forms, default is \item{split_forms}{Whether to split "repeating" or "all" forms, default is
all.} all.}

View File

@ -1,20 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{redcap_meta_default}
\alias{redcap_meta_default}
\title{Default column names of a REDCap data dictionary}
\usage{
redcap_meta_default(...)
}
\arguments{
\item{...}{ignored for now}
}
\value{
character vector
}
\description{
Default column names of a REDCap data dictionary
}
\examples{
dput(redcap_meta_default())
}

View File

@ -36,14 +36,14 @@ str(ds)
```{r} ```{r}
ds|> ds|>
ds2dd_detailed()|> ds2dd_detailed(metadata = names(REDCapCAST::redcapcast_meta))|>
purrr::pluck("data") |> purrr::pluck("data") |>
str() str()
``` ```
```{r} ```{r}
ds|> ds|>
ds2dd_detailed()|> ds2dd_detailed(metadata = names(REDCapCAST::redcapcast_meta))|>
purrr::pluck("meta") |> purrr::pluck("meta") |>
head(10) head(10)
``` ```