Compare commits

...

5 Commits

15 changed files with 441 additions and 121 deletions

View File

@ -60,12 +60,14 @@ Imports:
assertthat,
openxlsx2,
readODS,
forcats
forcats,
rlang
Collate:
'REDCapCAST-package.R'
'utils.r'
'process_user_input.r'
'REDCap_split.r'
'as_factor.R'
'doc2dd.R'
'ds2dd.R'
'ds2dd_detailed.R'

View File

@ -1,10 +1,16 @@
# Generated by roxygen2: do not edit by hand
S3method(as_factor,character)
S3method(as_factor,haven_labelled)
S3method(as_factor,labelled)
S3method(as_factor,logical)
S3method(as_factor,numeric)
S3method(process_user_input,character)
S3method(process_user_input,data.frame)
S3method(process_user_input,default)
S3method(process_user_input,response)
export(REDCap_split)
export(as_factor)
export(case_match_regex_list)
export(char2choice)
export(char2cond)
@ -17,6 +23,7 @@ export(ds2dd)
export(ds2dd_detailed)
export(easy_redcap)
export(export_redcap_instrument)
export(fct2num)
export(file_extension)
export(focused_metadata)
export(format_subheader)
@ -28,6 +35,7 @@ export(haven_all_levels)
export(html_tag_wrap)
export(is_repeated_longitudinal)
export(match_fields_to_form)
export(named_levels)
export(numchar2fct)
export(parse_data)
export(process_user_input)
@ -51,5 +59,6 @@ importFrom(keyring,key_set)
importFrom(openxlsx2,read_xlsx)
importFrom(purrr,reduce)
importFrom(readr,parse_time)
importFrom(rlang,check_dots_used)
importFrom(tidyr,pivot_wider)
importFrom(tidyselect,all_of)

249
R/as_factor.R Normal file
View File

@ -0,0 +1,249 @@
#' Convert labelled vectors to factors while preserving attributes
#'
#' This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
#' original attributes except for "class" after converting to factor to avoid
#' ta loss in case of rich formatted and labelled data.
#'
#' Please refer to parent functions for extended documentation.
#'
#' @param x Object to coerce to a factor.
#' @param ... Other arguments passed down to method.
#' @export
#' @examples
#' # will preserve all attributes but class
#' c(1, 4, 3, "A", 7, 8, 1) |> as_factor()
#' structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10)
#' ) |>
#' as_factor()
#'
#' structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10),
#' class = "haven_labelled"
#' ) |>
#' as_factor()
#'
#' @importFrom forcats as_factor
#' @importFrom rlang check_dots_used
#' @export
#' @name as_factor
as_factor <- function(x, ...) {
rlang::check_dots_used()
UseMethod("as_factor")
}
#' @rdname as_factor
#' @export
as_factor.logical <- function(x, ...) {
labels <- get_attr(x)
x <- forcats::as_factor(x, ...)
set_attr(x, labels[-match("class", names(labels))])
}
#' @rdname as_factor
#' @export
as_factor.numeric <- function(x, ...) {
labels <- get_attr(x)
x <- forcats::as_factor(x, ...)
set_attr(x, labels[-match("class", names(labels))])
}
#' @rdname as_factor
#' @export
as_factor.character <- function(x, ...) {
labels <- get_attr(x)
x <- forcats::as_factor(x, ...)
set_attr(x, labels[-match("class", names(labels))])
}
#' @rdname as_factor
#' @export
as_factor.haven_labelled <- function(x, ...) {
labels <- get_attr(x)
x <- haven::as_factor(x, ...)
set_attr(x, labels[-match("class", names(labels))])
}
#' @export
#' @rdname as_factor
as_factor.labelled <- as_factor.haven_labelled
#' Get named vector of factor levels and values
#'
#' @param data factor
#' @param label character string of attribute with named vector of factor labels
#'
#' @return named vector
#' @export
#'
#' @examples
#' structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10),
#' class = "haven_labelled"
#' ) |> as_factor() |> named_levels()
named_levels <- function(data, label = "labels",na.label=NULL, na.value=99) {
stopifnot(is.factor(data))
if (!is.null(na.label)){
attrs <- attributes(data)
lvls <- as.character(data)
lvls[is.na(lvls)] <- na.label
vals <- as.numeric(data)
vals[is.na(vals)] <- na.value
lbls <- data.frame(
name = lvls,
value = vals
) |> unique() |>
(\(d){
stats::setNames(d$value, d$name)
})() |>
sort()
data <- do.call(structure,
c(list(.Data=match(vals,lbls)),
attrs[-match("levels", names(attrs))],
list(levels=names(lbls),
labels=lbls)))
}
d <- data.frame(
name = levels(data)[data],
value = as.numeric(data)
) |>
unique()
## Applying labels
attr_l <- attr(x = data, which = label, exact = TRUE)
if (length(attr_l) != 0) {
d$value[match(names(attr_l), d$name)] <- unname(attr_l)
}
out <- stats::setNames(d$value, d$name)
## Sort if levels are numeric
## Else, they appear in order of appearance
if (identical(
levels(data),
suppressWarnings(as.character(as.numeric(levels(data))))
)) {
out <- out |> sort()
}
out
}
#' Allows conversion of factor to numeric values preserving original levels
#'
#' @param data vector
#'
#' @return numeric vector
#' @export
#'
#' @examples
#' c(1, 4, 3, "A", 7, 8, 1) |>
#' as_factor() |> fct2num()
#'
#' structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10),
#' class = "haven_labelled"
#' ) |>
#' as_factor() |>
#' fct2num()
#'
#' structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10)
#' ) |>
#' as_factor() |>
#' fct2num()
fct2num <- function(data) {
stopifnot(is.factor(data))
as.numeric(named_levels(data))[match(data, names(named_levels(data)))]
}
#' Extract attribute. Returns NA if none
#'
#' @param data vector
#' @param attr attribute name
#'
#' @return character vector
#' @export
#'
#' @examples
#' attr(mtcars$mpg, "label") <- "testing"
#' sapply(mtcars, get_attr)
#' lapply(mtcars, \(.x)get_attr(.x, NULL))
#' mtcars |>
#' numchar2fct(numeric.threshold = 6) |>
#' ds2dd_detailed()
get_attr <- function(data, attr = NULL) {
if (is.null(attr)) {
attributes(data)
} else {
a <- attr(data, attr, exact = TRUE)
if (is.null(a)) {
NA
} else {
a
}
}
}
#' Set attributes for named attribute. Appends if attr is NULL
#'
#' @param data vector
#' @param label label
#' @param attr attribute name
#'
#' @return vector with attribute
#' @export
#'
set_attr <- function(data, label, attr = NULL) {
if (is.null(attr)) {
## Has to be list...
stopifnot(is.list(label))
## ... with names
stopifnot(length(label)==length(names(label)))
attributes(data) <- c(attributes(data),label)
} else {
attr(data, attr) <- label
}
data
}
#' Finish incomplete haven attributes substituting missings with values
#'
#' @param data haven labelled variable
#'
#' @return named vector
#' @export
#'
#' @examples
#' ds <- structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10),
#' class = "haven_labelled"
#' )
#' haven::is.labelled(ds)
#' attributes(ds)
#' ds |> haven_all_levels()
haven_all_levels <- function(data) {
stopifnot(haven::is.labelled(data))
if (length(attributes(data)$labels) == length(unique(data))) {
out <- attributes(data)$labels
} else {
att <- attributes(data)$labels
out <- c(unique(data[!data %in% att]), att) |>
stats::setNames(c(unique(data[!data %in% att]), names(att)))
}
out
}
# readr::read_rds("/Users/au301842/PAaSO/labelled_test.rds") |> ds2dd_detailed()
#' sample(c(TRUE,FALSE,NA),20,TRUE) |> set_attr("hidden","status") |> trial_fct() |> named_levels(na.label = "Missing") |> sort()
# trial_fct <- function(x){
# labels <- get_attr(x)
# x <- factor(x, levels = c("FALSE", "TRUE"))
# set_attr(x, labels[-match("class", names(labels))])
# }

View File

@ -172,20 +172,20 @@ ds2dd_detailed <- function(data,
if (convert.logicals) {
# Labels/attributes are saved
labels <- lapply(data, \(.x){
get_attr(.x, attr = NULL)
})
# labels <- lapply(data, \(.x){
# get_attr(.x, attr = NULL)
# })
no_attr <- data |>
data <- data |>
## Converts logical to factor, which overwrites attributes
dplyr::mutate(dplyr::across(dplyr::where(is.logical), forcats::as_factor))
dplyr::mutate(dplyr::across(dplyr::where(is.logical), as_factor))
# Old attributes are appended
data <- purrr::imap(no_attr,\(.x,.i){
attributes(.x) <- c(attributes(.x),labels[[.i]])
.x
}) |>
dplyr::bind_cols()
# data <- purrr::imap(no_attr,\(.x,.i){
# attributes(.x) <- c(attributes(.x),labels[[.i]])
# .x
# }) |>
# dplyr::bind_cols()
}
@ -262,7 +262,6 @@ ds2dd_detailed <- function(data,
}
}
data_classes <- do.call(c, lapply(data, \(.x)class(.x)[1]))
## field_type
@ -308,27 +307,15 @@ ds2dd_detailed <- function(data,
## choices
if (any(do.call(c, lapply(data, haven::is.labelled)))) {
factor_levels <- data |>
lapply(function(x) {
if (haven::is.labelled(x)) {
att <- haven_all_levels(x)
paste(paste(att, names(att), sep = ", "), collapse = " | ")
} else {
NA
}
}) |>
(\(x)do.call(c, x))()
} else {
factor_levels <- data |>
lapply(function(x) {
if (is.factor(x)) {
## Re-factors to avoid confusion with missing levels
## Assumes all relevant levels are represented in the data
re_fac <- factor(x)
## Custom function to ensure factor order and keep original values
## Avoiding refactoring to keep as much information as possible
lvls <- sort(named_levels(x))
paste(
paste(seq_along(levels(re_fac)),
levels(re_fac),
paste(lvls,
names(lvls),
sep = ", "
),
collapse = " | "
@ -338,7 +325,6 @@ ds2dd_detailed <- function(data,
}
}) |>
(\(x)do.call(c, x))()
}
dd <-
dd |> dplyr::mutate(
@ -357,33 +343,6 @@ ds2dd_detailed <- function(data,
)
}
#' Finish incomplete haven attributes substituting missings with values
#'
#' @param data haven labelled variable
#'
#' @return named vector
#' @export
#'
#' @examples
#' ds <- structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10),
#' class = "haven_labelled"
#' )
#' haven::is.labelled(ds)
#' attributes(ds)
#' ds |> haven_all_levels()
haven_all_levels <- function(data) {
stopifnot(haven::is.labelled(data))
if (length(attributes(data)$labels) == length(unique(data))) {
out <- attributes(data)$labels
} else {
att <- attributes(data)$labels
out <- c(unique(data[!data %in% att]), att) |>
stats::setNames(c(unique(data[!data %in% att]), names(att)))
}
out
}
#' Guess time variables based on naming pattern
#'
@ -567,50 +526,6 @@ numchar2fct <- function(data, numeric.threshold = 6, character.throshold = 6) {
)
}
#' Extract attribute. Returns NA if none
#'
#' @param data vector
#' @param attr attribute name
#'
#' @return character vector
#' @export
#'
#' @examples
#' attr(mtcars$mpg, "label") <- "testing"
#' sapply(mtcars, get_attr)
#' lapply(mtcars, \(.x)get_attr(.x, NULL))
#' mtcars |>
#' numchar2fct(numeric.threshold = 6) |>
#' ds2dd_detailed()
get_attr <- function(data, attr = NULL) {
if (is.null(attr)) {
attributes(data)
} else {
a <- attr(data, attr, exact = TRUE)
if (is.null(a)) {
NA
} else {
a
}
}
}
#' Set attributes for named attribute. Appends if attr is NULL
#'
#' @param data vector
#' @param label label
#' @param attr attribute name
#'
#' @return vector with attribute
#' @export
#'
set_attr <- function(data, label, attr = NULL) {
if (is.null(attr)) {
attributes(data) <- c(attributes(data),label)
} else {
attr(data, attr) <- label
}
data
}

View File

@ -48,7 +48,7 @@ export_redcap_instrument <- function(data,
ideas on exporting multiple instruments.")
}
if (record.id %in% data[["field_name"]]){
if (!is.na(record.id) && record.id %in% data[["field_name"]]){
data <- data[-match(record.id,data[["field_name"]]),]
}

View File

@ -24,30 +24,30 @@ server <- function(input, output, session) {
out <- read_input(input$ds$datapath)
# Saves labels to reapply later
labels <- lapply(out, get_attr)
# labels <- lapply(out, get_attr)
out <- out |>
## Parses data with readr functions
parse_data() |>
## Converts logical to factor, which overwrites attributes
##
dplyr::mutate(dplyr::across(dplyr::where(is.logical), forcats::as_factor))
dplyr::mutate(dplyr::across(dplyr::where(is.logical), as_factor))
if (!is.null(input$factor_vars)) {
out <- out |>
dplyr::mutate(
dplyr::across(
dplyr::all_of(input$factor_vars),
forcats::as_factor
as_factor
)
)
}
# Old attributes are appended
out <- purrr::imap(out,\(.x,.i){
set_attr(.x,labels[[.i]])
}) |>
dplyr::bind_cols()
# out <- purrr::imap(out,\(.x,.i){
# set_attr(.x,labels[[.i]])
# }) |>
# dplyr::bind_cols()
out
})
@ -62,7 +62,10 @@ server <- function(input, output, session) {
dd <- shiny::reactive({
shiny::req(input$ds)
v$file <- "loaded"
ds2dd_detailed(data = dat())
ds2dd_detailed(
data = dat(),
add.auto.id = input$add_id=="yes"
)
})
output$uploaded <- shiny::reactive({
@ -86,6 +89,18 @@ server <- function(input, output, session) {
)
})
## Specify ID if necessary
# output$id_var <- shiny::renderUI({
# shiny::req(input$ds)
# selectizeInput(
# inputId = "id_var",
# selected = colnames(dat())[1],
# label = "ID variable",
# choices = colnames(dat())[-match(colnames(dat()),input$factor_vars)],
# multiple = FALSE
# )
# })
output$data.tbl <- gt::render_gt(
dd() |>
purrr::pluck("data") |>
@ -157,7 +172,9 @@ server <- function(input, output, session) {
output$downloadInstrument <- shiny::downloadHandler(
filename = paste0("REDCapCAST_instrument", Sys.Date(), ".zip"),
content = function(file) {
export_redcap_instrument(purrr::pluck(dd(), "meta"), file)
export_redcap_instrument(purrr::pluck(dd(), "meta"),
file = file,
record.id = ifelse(input$add_id=="none",NA,names(dat())[1]))
}
)

View File

@ -30,6 +30,17 @@ ui <-
# This has been solved by adding an arbitrary button to load data - which was abandoned again
shiny::conditionalPanel(
condition = "output.uploaded=='yes'",
shiny::radioButtons(
inputId = "add_id",
label = "Add ID, or use first column?",
selected = "no",
inline = TRUE,
choices = list(
"First column" = "no",
"Add ID" = "yes",
"No ID" = "none"
)
),
shiny::radioButtons(
inputId = "specify_factors",
label = "Specify categorical variables?",

View File

@ -22,14 +22,16 @@ On the left, you initially just find one single option to upload a spreadsheet.
### REDCap database files creation
The spreadsheet column names will be adjusted to comply with REDCap naming criteria, and a renamed (adjusted) spreadsheet can be downloaded.
The spreadsheet column names will be adjusted to comply with REDCap naming criteria, and a renamed (adjusted) spreadsheet can be downloaded. If your spreadsheet columns are labelled (exported from stata or labelled in R, these labels will be used for the visible field names (field label) i REDCap).
Based on the uploaded spreadsheet, the app will make a qualified guess on data classes and if the data is labelled (like .rda or .dta) all this information will be included in the data dictionary file. The default data format is "text".
Based on the uploaded spreadsheet, the app will make a qualified guess on data classes and if the data is labelled (like .rda or .dta) all this information will be included in the data dictionary file. The default data format is "text". In addition categorical variables can be specified manually, and you caon add an ID column , or assume the first column is the ID (please reorder before export).
If you want to add data to an existing database, an instrument can be created. This metadata file is identical to a data dictionary, but does not include a "record_id" field and is packaged as a .zip file, which is uploaded in the "Designer" interface in REDCap.
If you want to add data to an existing database, an instrument can be created. This metadata file is identical to a data dictionary, but does not include the ID field (if included or added) and is packaged as a .zip file, which is uploaded in the "Designer" interface in REDCap.
### Transferring directly to a REDCap database
This feature is mainly a show-case. Use it if you like, but most will feel more secure doing manual uploads.
Based on the API-functions in REDCap, you can upload your data dictionary and renamed data directly from this interface (no data is stored on the server, but consider launching this shiny app on your own machine after having installed the [REDCapCAST package](https://agdamsbo.github.io/REDCapCAST/#installation) in R). Launch a local instance of this app with:
```
@ -56,10 +58,10 @@ This app and package can be cited using the following bibtex citation or by refe
```
@agdamsboREDCapCAST{,
title = {REDCapCAST: REDCap Castellated Data Handling And Metadata Casting},
author = {Andreas Gammelgaard Damsbo and Paul Egeler},
title = {REDCapCAST: REDCap Castellated Data Handling and Metadata Casting},
author = {Andreas Gammelgaard Damsbo},
year = {2024},
note = {R package version 24.11.1, https://agdamsbo.github.io/REDCapCAST/},
note = {R package version 24.11.2, https://agdamsbo.github.io/REDCapCAST/},
url = {https://github.com/agdamsbo/REDCapCAST},
doi = {10.5281/zenodo.8013984},
}

View File

@ -22,5 +22,10 @@ Useful links:
\author{
\strong{Maintainer}: Andreas Gammelgaard Damsbo \email{agdamsbo@clin.au.dk} (\href{https://orcid.org/0000-0002-7559-1154}{ORCID})
Authors:
\itemize{
\item Paul Egeler \email{paulegeler@gmail.com} (\href{https://orcid.org/0000-0001-6948-9498}{ORCID})
}
}
\keyword{internal}

51
man/as_factor.Rd Normal file
View File

@ -0,0 +1,51 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_factor.R
\name{as_factor}
\alias{as_factor}
\alias{as_factor.logical}
\alias{as_factor.numeric}
\alias{as_factor.character}
\alias{as_factor.haven_labelled}
\alias{as_factor.labelled}
\title{Convert labelled vectors to factors while preserving attributes}
\usage{
as_factor(x, ...)
\method{as_factor}{logical}(x, ...)
\method{as_factor}{numeric}(x, ...)
\method{as_factor}{character}(x, ...)
\method{as_factor}{haven_labelled}(x, ...)
\method{as_factor}{labelled}(x, ...)
}
\arguments{
\item{x}{Object to coerce to a factor.}
\item{...}{Other arguments passed down to method.}
}
\description{
This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
original attributes except for "class" after converting to factor to avoid
ta loss in case of rich formatted and labelled data.
}
\details{
Please refer to parent functions for extended documentation.
}
\examples{
# will preserve all attributes but class
c(1, 4, 3, "A", 7, 8, 1) |> as_factor()
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10)
) |>
as_factor()
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
) |>
as_factor()
}

34
man/fct2num.Rd Normal file
View File

@ -0,0 +1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_factor.R
\name{fct2num}
\alias{fct2num}
\title{Allows conversion of factor to numeric values preserving original levels}
\usage{
fct2num(data)
}
\arguments{
\item{data}{vector}
}
\value{
numeric vector
}
\description{
Allows conversion of factor to numeric values preserving original levels
}
\examples{
c(1, 4, 3, "A", 7, 8, 1) |>
as_factor() |> fct2num()
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
) |>
as_factor() |>
fct2num()
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10)
) |>
as_factor() |>
fct2num()
}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
% Please edit documentation in R/as_factor.R
\name{get_attr}
\alias{get_attr}
\title{Extract attribute. Returns NA if none}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
% Please edit documentation in R/as_factor.R
\name{haven_all_levels}
\alias{haven_all_levels}
\title{Finish incomplete haven attributes substituting missings with values}

25
man/named_levels.Rd Normal file
View File

@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_factor.R
\name{named_levels}
\alias{named_levels}
\title{Get named vector of factor levels and values}
\usage{
named_levels(data, label = "labels", na.label = NULL, na.value = 99)
}
\arguments{
\item{data}{factor}
\item{label}{character string of attribute with named vector of factor labels}
}
\value{
named vector
}
\description{
Get named vector of factor levels and values
}
\examples{
structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
) |> as_factor() |> named_levels()
}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
% Please edit documentation in R/as_factor.R
\name{set_attr}
\alias{set_attr}
\title{Set attributes for named attribute. Appends if attr is NULL}