Compare commits

..

No commits in common. "f2b2784547ce5d4c20e7b4abe272272fce85fadb" and "927d485739a4c9bedf9378b2c6244794560e9667" have entirely different histories.

25 changed files with 179 additions and 540 deletions

1
.gitignore vendored
View File

@ -12,4 +12,3 @@ drafting
.DS_Store .DS_Store
cran-comments.md cran-comments.md
~/REDCapCAST/inst/shiny-examples/casting/rsconnect ~/REDCapCAST/inst/shiny-examples/casting/rsconnect
~/REDCapCAST/inst/shiny-examples/casting/rsconnect/

View File

@ -58,8 +58,7 @@ Imports:
zip, zip,
assertthat, assertthat,
openxlsx2, openxlsx2,
readODS, readODS
forcats
Collate: Collate:
'REDCapCAST-package.R' 'REDCapCAST-package.R'
'utils.r' 'utils.r'

View File

@ -16,19 +16,14 @@ export(doc2dd)
export(ds2dd) export(ds2dd)
export(ds2dd_detailed) export(ds2dd_detailed)
export(easy_redcap) export(easy_redcap)
export(export_redcap_instrument)
export(file_extension) export(file_extension)
export(focused_metadata) export(focused_metadata)
export(format_subheader) export(format_subheader)
export(get_api_key) export(get_api_key)
export(guess_time_only)
export(guess_time_only_filter) export(guess_time_only_filter)
export(haven_all_levels)
export(html_tag_wrap) export(html_tag_wrap)
export(is_repeated_longitudinal) export(is_repeated_longitudinal)
export(match_fields_to_form) export(match_fields_to_form)
export(numchar2fct)
export(parse_data)
export(process_user_input) export(process_user_input)
export(read_input) export(read_input)
export(read_redcap_instrument) export(read_redcap_instrument)
@ -38,11 +33,9 @@ export(sanitize_split)
export(shiny_cast) export(shiny_cast)
export(split_non_repeating_forms) export(split_non_repeating_forms)
export(strsplitx) export(strsplitx)
export(var2fct)
importFrom(REDCapR,redcap_event_instruments) importFrom(REDCapR,redcap_event_instruments)
importFrom(REDCapR,redcap_metadata_read) importFrom(REDCapR,redcap_metadata_read)
importFrom(REDCapR,redcap_read) importFrom(REDCapR,redcap_read)
importFrom(forcats,as_factor)
importFrom(keyring,key_get) importFrom(keyring,key_get)
importFrom(keyring,key_list) importFrom(keyring,key_list)
importFrom(keyring,key_set) importFrom(keyring,key_set)

10
NEWS.md
View File

@ -1,13 +1,3 @@
# REDCapCAST 24.11.2
24.11.1 was rejected on CRAN based on wrong title capitalisation. This was an opportunity to extend the package overhaul.
Documentation has been updated. Data parser functions have been added (based on readr) and separated from the ds2dd_detailed().
Vignettes and documentation have been restructured.
This package has been detached from the REDCapRITS, which it was originally forked from. The data split function will be kept, while testing will be rewritten. This projects has evolved away from the original fork, so I think this detachment is fair.
# REDCapCAST 24.11.1 # REDCapCAST 24.11.1
Revised tests. Revised tests.

View File

@ -135,12 +135,18 @@ hms2character <- function(data) {
#' file with `haven::read_dta()`). #' file with `haven::read_dta()`).
#' @param metadata redcap metadata headings. Default is #' @param metadata redcap metadata headings. Default is
#' REDCapCAST:::metadata_names. #' REDCapCAST:::metadata_names.
#' @param validate.time Flag to validate guessed time columns
#' @param time.var.sel.pos Positive selection regex string passed to
#' `gues_time_only_filter()` as sel.pos.
#' @param time.var.sel.neg Negative selection regex string passed to
#' `gues_time_only_filter()` as sel.neg.
#' #'
#' @return list of length 2 #' @return list of length 2
#' @export #' @export
#' #'
#' @examples #' @examples
#' data <- REDCapCAST::redcapcast_data #' data <- REDCapCAST::redcapcast_data
#' data |> ds2dd_detailed(validate.time = TRUE)
#' data |> ds2dd_detailed() #' data |> ds2dd_detailed()
#' iris |> ds2dd_detailed(add.auto.id = TRUE) #' iris |> ds2dd_detailed(add.auto.id = TRUE)
#' iris |> #' iris |>
@ -166,7 +172,10 @@ ds2dd_detailed <- function(data,
field.label = NULL, field.label = NULL,
field.label.attr = "label", field.label.attr = "label",
field.validation = NULL, field.validation = NULL,
metadata = names(REDCapCAST::redcapcast_meta)) { metadata = names(REDCapCAST::redcapcast_meta),
validate.time = FALSE,
time.var.sel.pos = "[Tt]i[d(me)]",
time.var.sel.neg = "[Dd]at[eo]") {
## Handles the odd case of no id column present ## Handles the odd case of no id column present
if (add.auto.id) { if (add.auto.id) {
data <- dplyr::tibble( data <- dplyr::tibble(
@ -176,6 +185,43 @@ ds2dd_detailed <- function(data,
message("A default id column has been added") message("A default id column has been added")
} }
if (validate.time) {
return(data |> guess_time_only_filter(validate = TRUE))
}
if (lapply(data, haven::is.labelled) |> (\(x)do.call(c, x))() |> any()) {
message("Data seems to be imported with haven from a Stata (.dta) file and
will be treated as such.")
data.source <- "dta"
} else {
data.source <- ""
}
## data classes
### Only keeps the first class, as time fields (POSIXct/POSIXt) has two
### classes
if (data.source == "dta") {
data_classes <-
data |>
haven::as_factor() |>
time_only_correction(
sel.pos = time.var.sel.pos,
sel.neg = time.var.sel.neg
) |>
lapply(\(x)class(x)[1]) |>
(\(x)do.call(c, x))()
} else {
data_classes <-
data |>
time_only_correction(
sel.pos = time.var.sel.pos,
sel.neg = time.var.sel.neg
) |>
lapply(\(x)class(x)[1]) |>
(\(x)do.call(c, x))()
}
## --------------------------------------- ## ---------------------------------------
## Building the data dictionary ## Building the data dictionary
## --------------------------------------- ## ---------------------------------------
@ -223,16 +269,17 @@ ds2dd_detailed <- function(data,
## field_label ## field_label
if (is.null(field.label)) { if (is.null(field.label)) {
if (data.source == "dta") {
dd$field_label <- data |> dd$field_label <- data |>
lapply(function(x) { lapply(function(x) {
if (haven::is.labelled(x)) { if (haven::is.labelled(x)) {
att <- haven_all_levels(x) attributes(x)[[field.label.attr]]
names(att)
} else { } else {
NA NA
} }
}) |> }) |>
(\(x)do.call(c, x))() (\(x)do.call(c, x))()
}
dd <- dd <-
dd |> dplyr::mutate(field_label = dplyr::if_else(is.na(field_label), dd |> dplyr::mutate(field_label = dplyr::if_else(is.na(field_label),
@ -247,8 +294,6 @@ ds2dd_detailed <- function(data,
} }
data_classes <- do.call(c, lapply(data, \(.x)class(.x)[1]))
## field_type ## field_type
if (is.null(field.type)) { if (is.null(field.type)) {
@ -267,6 +312,7 @@ ds2dd_detailed <- function(data,
} }
## validation ## validation
if (is.null(field.validation)) { if (is.null(field.validation)) {
dd <- dd <-
dd |> dplyr::mutate( dd |> dplyr::mutate(
@ -290,13 +336,15 @@ ds2dd_detailed <- function(data,
} }
} }
## choices ## choices
if (any(do.call(c, lapply(data, haven::is.labelled)))) { if (data.source == "dta") {
factor_levels <- data |> factor_levels <- data |>
lapply(function(x) { lapply(function(x) {
if (haven::is.labelled(x)) { if (haven::is.labelled(x)) {
att <- haven_all_levels(x) att <- attributes(x)$labels
paste(paste(att, names(att), sep = ", "), collapse = " | ") paste(paste(att, names(att), sep = ", "), collapse = " | ")
} else { } else {
NA NA
@ -335,75 +383,16 @@ ds2dd_detailed <- function(data,
list( list(
data = data |> data = data |>
time_only_correction(
sel.pos = time.var.sel.pos,
sel.neg = time.var.sel.neg
) |>
hms2character() |> hms2character() |>
stats::setNames(dd$field_name), stats::setNames(dd$field_name),
meta = dd meta = dd
) )
} }
#' Finish incomplete haven attributes substituting missings with values
#'
#' @param data haven labelled variable
#'
#' @return named vector
#' @export
#'
#' @examples
#' ds <- structure(c(1, 2, 3, 2, 10, 9),
#' labels = c(Unknown = 9, Refused = 10),
#' class = "haven_labelled"
#' )
#' ds |> haven_all_levels()
haven_all_levels <- function(data) {
stopifnot(haven::is.labelled(data))
if (length(attributes(data)$labels) == length(unique(data))) {
out <- attributes(data)$labels
} else {
att <- attributes(data)$labels
out <- c(unique(data[!data %in% att]), att) |>
stats::setNames(c(unique(data[!data %in% att]), names(att)))
}
out
}
#' Guess time variables based on naming pattern
#'
#' @description
#' This is for repairing data with time variables with appended "1970-01-01"
#'
#'
#' @param data data.frame or tibble
#' @param validate.time Flag to validate guessed time columns
#' @param time.var.sel.pos Positive selection regex string passed to
#' `gues_time_only_filter()` as sel.pos.
#' @param time.var.sel.neg Negative selection regex string passed to
#' `gues_time_only_filter()` as sel.neg.
#'
#' @return data.frame or tibble
#' @export
#'
#' @examples
#' redcapcast_data |> guess_time_only(validate.time = TRUE)
guess_time_only <- function(data,
validate.time = FALSE,
time.var.sel.pos = "[Tt]i[d(me)]",
time.var.sel.neg = "[Dd]at[eo]") {
if (validate.time) {
return(data |> guess_time_only_filter(validate = TRUE))
}
### Only keeps the first class, as time fields (POSIXct/POSIXt) has two
### classes
data |> time_only_correction(
sel.pos = time.var.sel.pos,
sel.neg = time.var.sel.neg
)
}
### Completion ### Completion
#' Completion marking based on completed upload #' Completion marking based on completed upload
#' #'
@ -424,127 +413,3 @@ mark_complete <- function(upload, ls) {
) |> ) |>
stats::setNames(c(names(data)[1], paste0(forms, "_complete"))) stats::setNames(c(names(data)[1], paste0(forms, "_complete")))
} }
#' Helper to auto-parse un-formatted data with haven and readr
#'
#' @param data data.frame or tibble
#' @param guess_type logical to guess type with readr
#' @param col_types specify col_types using readr semantics. Ignored if guess_type is TRUE
#' @param locale option to specify locale. Defaults to readr::default_locale().
#' @param ignore.vars specify column names of columns to ignore when parsing
#' @param ... ignored
#'
#' @return data.frame or tibble
#' @export
#'
#' @examples
#' mtcars |>
#' parse_data() |>
#' str()
parse_data <- function(data,
guess_type = TRUE,
col_types = NULL,
locale = readr::default_locale(),
ignore.vars = "cpr",
...) {
if (any(ignore.vars %in% names(data))) {
ignored <- data[ignore.vars]
} else {
ignored <- NULL
}
## Parses haven data by applying labels as factors in case of any
if (do.call(c, lapply(data, (\(x)inherits(x, "haven_labelled")))) |> any()) {
data <- data |>
haven::as_factor()
}
## Applying readr cols
if (is.null(col_types) && guess_type) {
if (do.call(c, lapply(data, is.character)) |> any()) {
data <- data |> readr::type_convert(
locale = locale,
col_types = readr::cols(.default = readr::col_guess())
)
}
} else {
data <- data |> readr::type_convert(
locale = locale,
col_types = readr::cols(col_types)
)
}
if (!is.null(ignored)) {
data[ignore.vars] <- ignored
}
data
}
#' Convert vector to factor based on threshold of number of unique levels
#'
#' @description
#' This is a wrapper of forcats::as_factor, which sorts numeric vectors before
#' factoring, but levels character vectors in order of appearance.
#'
#'
#' @param data vector or data.frame column
#' @param unique.n threshold to convert class to factor
#'
#' @return vector
#' @export
#' @importFrom forcats as_factor
#'
#' @examples
#' sample(seq_len(4), 20, TRUE) |>
#' var2fct(6) |>
#' summary()
#' sample(letters, 20) |>
#' var2fct(6) |>
#' summary()
#' sample(letters[1:4], 20, TRUE) |> var2fct(6)
var2fct <- function(data, unique.n) {
if (length(unique(data)) <= unique.n) {
forcats::as_factor(data)
} else {
data
}
}
#' Applying var2fct across data set
#'
#' @description
#' Individual thresholds for character and numeric columns
#'
#' @param data dataset. data.frame or tibble
#' @param numeric.threshold threshold for var2fct for numeric columns. Default
#' is 6.
#' @param character.throshold threshold for var2fct for character columns.
#' Default is 6.
#'
#' @return data.frame or tibble
#' @export
#'
#' @examples
#' mtcars |> str()
#' mtcars |>
#' numchar2fct(numeric.threshold = 6) |>
#' str()
numchar2fct <- function(data, numeric.threshold = 6, character.throshold = 6) {
data |>
dplyr::mutate(
dplyr::across(
dplyr::where(is.numeric),
\(.x){
var2fct(data = .x, unique.n = numeric.threshold)
}
),
dplyr::across(
dplyr::where(is.character),
\(.x){
var2fct(data = .x, unique.n = character.throshold)
}
)
)
}

View File

@ -14,7 +14,6 @@
#' @param record.id record id variable name. Default is 'record_id'. #' @param record.id record id variable name. Default is 'record_id'.
#' #'
#' @return exports zip-file #' @return exports zip-file
#' @export
#' #'
#' @examples #' @examples
#' #iris |> #' #iris |>

View File

@ -17,9 +17,6 @@
#' \item{age_integer}{Age integer, numeric} #' \item{age_integer}{Age integer, numeric}
#' \item{sex}{Legal sex, character} #' \item{sex}{Legal sex, character}
#' \item{cohabitation}{Cohabitation status, character} #' \item{cohabitation}{Cohabitation status, character}
#' \item{con_calc}{con_calc}
#' \item{con_mrs}{con_mrs}
#' \item{consensus_complete}{consensus_complete}
#' \item{hypertension}{Hypertension, character} #' \item{hypertension}{Hypertension, character}
#' \item{diabetes}{diabetes, character} #' \item{diabetes}{diabetes, character}
#' \item{region}{region, character} #' \item{region}{region, character}

View File

@ -22,7 +22,7 @@ shiny_cast <- function(...) {
#' DEPRECATED Helper to import files correctly #' Helper to import files correctly
#' #'
#' @param filenames file names #' @param filenames file names
#' #'
@ -33,11 +33,9 @@ shiny_cast <- function(...) {
#' file_extension(list.files(here::here(""))[[2]])[[1]] #' file_extension(list.files(here::here(""))[[2]])[[1]]
#' file_extension(c("file.cd..ks","file")) #' file_extension(c("file.cd..ks","file"))
file_extension <- function(filenames) { file_extension <- function(filenames) {
sub( sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "",
pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "",
filenames, filenames,
perl = TRUE perl = TRUE)
)
} }
#' Flexible file import based on extension #' Flexible file import based on extension
@ -51,7 +49,7 @@ file_extension <- function(filenames) {
#' @examples #' @examples
#' read_input("https://raw.githubusercontent.com/agdamsbo/cognitive.index.lookup/main/data/sample.csv") #' read_input("https://raw.githubusercontent.com/agdamsbo/cognitive.index.lookup/main/data/sample.csv")
read_input <- function(file, consider.na = c("NA", '""', "")) { read_input <- function(file, consider.na = c("NA", '""', "")) {
ext <- tools::file_ext(file) ext <- file_extension(file)
tryCatch( tryCatch(
{ {
@ -76,3 +74,4 @@ read_input <- function(file, consider.na = c("NA", '""', "")) {
df df
} }

View File

@ -1,36 +1,53 @@
<!-- badges: start --> <!-- badges: start -->
[![GitHub R package version](https://img.shields.io/github/r-package/v/agdamsbo/REDCapCAST)](https://github.com/agdamsbo/REDCapCAST)
[![GitHub R package version](https://img.shields.io/github/r-package/v/agdamsbo/REDCapCAST)](https://github.com/agdamsbo/REDCapCAST) [![CRAN/METACRAN](https://img.shields.io/cran/v/REDCapCAST)](https://CRAN.R-project.org/package=REDCapCAST) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8013984.svg)](https://doi.org/10.5281/zenodo.8013984) [![R-hub](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml) [![R-CMD-check](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml) [![Page deployed](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment) [![Codecov test coverage](https://codecov.io/gh/agdamsbo/REDCapCAST/branch/master/graph/badge.svg)](https://app.codecov.io/gh/agdamsbo/REDCapCAST?branch=master) [![CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/REDCapCAST)](https://cran.r-project.org/package=REDCapCAST) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![CRAN/METACRAN](https://img.shields.io/cran/v/REDCapCAST)](https://CRAN.R-project.org/package=REDCapCAST)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8013984.svg)](https://doi.org/10.5281/zenodo.8013984)
[![R-hub](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml)
[![R-CMD-check](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml)
[![Page deployed](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment)
[![Codecov test coverage](https://codecov.io/gh/agdamsbo/REDCapCAST/branch/master/graph/badge.svg)](https://app.codecov.io/gh/agdamsbo/REDCapCAST?branch=master)
[![CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/REDCapCAST)](https://cran.r-project.org/package=REDCapCAST)
[![Lifecycle:
experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html)
<!-- badges: end --> <!-- badges: end -->
# REDCapCAST package <img src="man/figures/logo.png" align="right" /> # REDCapCAST package <img src="man/figures/logo.png" align="right" />
Casting metadata for REDCap database creation and handling of castellated data using repeated instruments and longitudinal projects in REDCap. REDCap database casting and handling of castellated data when using repeated instruments and longitudinal projects.
This is implemented with This package is a fork of [pegeler/REDCapRITS](https://github.com/pegeler/REDCapRITS). The `REDCapRITS` represents great and extensive work to handle castellated REDCap data in different programming languages. This fork is purely minded on R usage and includes a few implementations of the main `REDCap_split` function. The `REDCapRITS` as well as `REDCapCAST` would not be possible without the outstanding work in [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/).
- An app-interface for easy database creation [accessible here](https://agdamsbo.shinyapps.io/redcapcast/) or available to run locally with `shiny_cast()` allowing you to easily create a REDCap database based on an existing spreadsheet. ## What problem does `REDCapCAST` solve?
- Export data from REDCap in different formats handling castellated data, and on default only export requested data, this is mainly through `read_redcap_tables()`. I started working on this project as the castellated longitudinal data set was a little challenging. Later, I have come to learn of the [`redcapAPI`](https://github.com/vubiostat/redcapAPI) package, which would also cover this functionality. I find the `redcapAPI`package quite advanced and a little difficult to work with. This have led to the continued work on this package, as an easy-to-use approach for data migration, data base creation and data handling. This package is very much to be seen as an attempt at a R-to-REDCap-to-R foundry for handling both the transition from dataset/variable list to database and the other way, from REDCap database to a tidy dataset. The goal was also to allow for a "minimal data" approach by allowing to filter records, instruments and variables in the export to only download data needed. I think this approach is desirable for handling sensitive, clinical data. Please refer to [REDCap-Tools](https://redcap-tools.github.io/) for other great tools for working with REDCap in R.
REDCapCAST was initially build on, and still includes code from [pegeler/REDCapRITS](https://github.com/pegeler/REDCapRITS), and relies on functions from the [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/)-project
## History
This package was originally forked from [pegeler/REDCapRITS](https://github.com/pegeler/REDCapRITS). The `REDCapRITS` represents great and extensive work to handle castellated REDCap data in different programming languages. REDCapCAST has evolved into much more than just handling castellated data and so has been detatched from the original project while still relying on the main `REDCap_split` function. All access to the REDCap database is build on the outstanding work in [`REDCapR`](#0).
This package really started out of frustration during my PhD in health science hearing colleagues complaining about that "castellated" data formatting of REDCap exports when doing longitudinal projects and being used to wide data. This led to some bad decisions in building databases avoiding repeated instruments. This package solves these challenges, but solutions are also implemented else where like the [redcapAPI](https://github.com/vubiostat/redcapAPI) or [REDCapTidieR](https://github.com/CHOP-CGTInformatics/REDCapTidieR) packages, which are bigger project.
To help new PhD students and other researchers, I have also worked on creating a few helper/wrapper-functions to ease data access. Documentation is on it's way.
For any more advanced uses, consider using the [`redcapAPI`](https://github.com/vubiostat/redcapAPI) or [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/) packages. For any more advanced uses, consider using the [`redcapAPI`](https://github.com/vubiostat/redcapAPI) or [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/) packages.
What is unique in this package, is the work towards making it a lot easier to move data from different sources to REDCap databases: casting REDCap metadata based on a spreadsheet. This is all wrapped in `shiny_cast()`and [hosted for all to use here](https://agdamsbo.shinyapps.io/redcapcast/).
## Main functionality
Here is just a short description of the main functions:
* `REDcap_split()`: Works largely as the original `REDCapRITS::REDCap_split()`. It takes a REDCap dataset and metadata (data dictionary) to split the data set into a list of dataframes of instruments.
* `read_redcap_tables()`: wraps the use of [`REDCapR::redcap_read()`](https://github.com/OuhscBbmc/REDCapR) with `REDCap_split()` to ease the export of REDCap data. Default output is a list of data frames with one data frame for each REDCap instrument.
* `redcap_wider()`: joins and pivots a list of data frames with repeated instruments to a wide format utilizing the [`tidyr::pivot_wider()`](https://tidyr.tidyverse.org/reference/pivot_wider.html) from the [tidyverse](https://www.tidyverse.org/).
* `easy_redcap()`: combines secure API key storage with the `keyring`-package, focused data retrieval and optional widening. This is the recommended approach for easy data access and analysis.
* `ds2dd_detailed()`: Converts a data set to a data dictionary for upload to a new REDCap database. Variables (fields) and instruments in a REDCap data base are defined by this data dictionary.
* `doc2dd()`: Converts a document table to data dictionary. This allows to specify instrument or whole data dictionary in text document, which for most is easier to work with and easily modifiable. Very much like a easy version of just working directly in the data dictionary file itself.
* `shiny_cast()`: [Shiny](https://shiny.posit.co/) application to ease the process of converting a spreadsheet/data set to a REDCap database. The app runs locally and data is transferred securely. You can just create and upload the data dictionary, but you can also transfer the given data in the same process. The app is [hosted on shinyapps.io](https://agdamsbo.shinyapps.io/redcapcast/).
## Future ## Future
The plan with this package is to be bundled with a Handbook on working with REDCap from R. This work is in progress but is limited by the time available. Please feel free to contact me or create and issue with ideas for future additions. The plan with this package is to be bundled with a Handbook on working with REDCap from R. This work is in progress but is limited by the time available. Please feel free to contact me or create and issue with ideas for future additions.
## Installation and use ## Installation
The package is available on CRAN. Install the latest version: The package is available on CRAN. Install the latest version:

Binary file not shown.

Binary file not shown.

View File

@ -1,3 +1,4 @@
library(REDCapCAST)
library(bslib) library(bslib)
library(shiny) library(shiny)
library(openxlsx2) library(openxlsx2)
@ -6,14 +7,9 @@ library(readODS)
library(readr) library(readr)
library(dplyr) library(dplyr)
library(here) library(here)
library(devtools)
if (!requireNamespace("REDCapCAST")){
devtools::install_github("agdamsbo/REDCapCAST", quiet = TRUE, upgrade = "never")
}
library(REDCapCAST)
server <- function(input, output, session) { server <- function(input, output, session) {
v <- shiny::reactiveValues( v <- shiny::reactiveValues(
file = NULL file = NULL
) )
@ -21,8 +17,7 @@ server <- function(input, output, session) {
dat <- shiny::reactive({ dat <- shiny::reactive({
shiny::req(input$ds) shiny::req(input$ds)
read_input(input$ds$datapath) |> read_input(input$ds$datapath)
parse_data()
}) })
# getData <- reactive({ # getData <- reactive({
@ -32,11 +27,6 @@ server <- function(input, output, session) {
# return(!is.null(getData())) # return(!is.null(getData()))
# }) # })
dd <- shiny::reactive({
shiny::req(input$ds)
v$file <- "loaded"
ds2dd_detailed(data = dat())
})
output$uploaded <- shiny::reactive({ output$uploaded <- shiny::reactive({
if (is.null(v$file)) { if (is.null(v$file)) {
@ -48,6 +38,12 @@ server <- function(input, output, session) {
shiny::outputOptions(output, "uploaded", suspendWhenHidden = FALSE) shiny::outputOptions(output, "uploaded", suspendWhenHidden = FALSE)
dd <- shiny::reactive({
shiny::req(input$ds)
v$file <- "loaded"
ds2dd_detailed(data = dat())
})
output$data.tbl <- gt::render_gt( output$data.tbl <- gt::render_gt(
dd() |> dd() |>
purrr::pluck("data") |> purrr::pluck("data") |>
@ -91,15 +87,12 @@ server <- function(input, output, session) {
output_staging$meta <- output_staging$data <- NA output_staging$meta <- output_staging$data <- NA
shiny::observeEvent(input$upload.meta, { shiny::observeEvent(input$upload.meta,{ upload_meta() })
upload_meta()
})
shiny::observeEvent(input$upload.data, { shiny::observeEvent(input$upload.data,{ upload_data() })
upload_data()
})
upload_meta <- function(){ upload_meta <- function(){
shiny::req(input$uri) shiny::req(input$uri)
shiny::req(input$api) shiny::req(input$api)
@ -112,6 +105,7 @@ server <- function(input, output, session) {
} }
upload_data <- function(){ upload_data <- function(){
shiny::req(input$uri) shiny::req(input$uri)
shiny::req(input$api) shiny::req(input$api)
@ -127,8 +121,4 @@ server <- function(input, output, session) {
output$upload.data.print <- renderText(output_staging$data) output$upload.data.print <- renderText(output_staging$data)
# session$onSessionEnded(function() {
# # cat("Session Ended\n")
# unlink("www",recursive = TRUE)
# })
} }

View File

@ -19,27 +19,20 @@ ui <-
".ods" ".ods"
) )
), ),
# shiny::actionButton( shiny::helpText("Have a look at the preview panels to show download options."),
# inputId = "load_data",
# label = "Load data",
# icon = shiny::icon("circle-down")
# ),
shiny::helpText("Have a look at the preview panels to validate the data dictionary and imported data."),
# For some odd reason this only unfolds when the preview panel is shown.. # For some odd reason this only unfolds when the preview panel is shown..
# This has been solved by adding an arbitrary button to load data - which was abandoned again
shiny::conditionalPanel( shiny::conditionalPanel(
condition = "output.uploaded=='yes'", condition = "output.uploaded=='yes'",
# condition = "input.load_data", shiny::helpText("Below you can download the dataset formatted for upload and the
# shiny::helpText("Below you can download the dataset formatted for upload and the corresponding data dictionary for a new data base, if you want to upload manually."),
# corresponding data dictionary for a new data base, if you want to upload manually."),
# Button # Button
shiny::downloadButton(outputId = "downloadData", label = "Download renamed data"), shiny::downloadButton("downloadData", "Download renamed data"),
# Button # Button
shiny::downloadButton(outputId = "downloadMeta", label = "Download data dictionary"), shiny::downloadButton("downloadMeta", "Download data dictionary"),
# Button # Button
shiny::downloadButton(outputId = "downloadInstrument", label = "Download as instrument"), shiny::downloadButton("downloadInstrument", "Download as instrument"),
# Horizontal line ---- # Horizontal line ----
shiny::tags$hr(), shiny::tags$hr(),
@ -115,4 +108,3 @@ ui <-
) )
) )
) )

View File

@ -4,7 +4,7 @@
\name{REDCapCAST-package} \name{REDCapCAST-package}
\alias{REDCapCAST} \alias{REDCapCAST}
\alias{REDCapCAST-package} \alias{REDCapCAST-package}
\title{REDCapCAST: REDCap Castellated Data Handling and Metadata Casting} \title{REDCapCAST: REDCap Castellated Data Handling And Metadata Casting}
\description{ \description{
\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}

View File

@ -15,7 +15,10 @@ ds2dd_detailed(
field.label = NULL, field.label = NULL,
field.label.attr = "label", field.label.attr = "label",
field.validation = NULL, field.validation = NULL,
metadata = names(REDCapCAST::redcapcast_meta) metadata = names(REDCapCAST::redcapcast_meta),
validate.time = FALSE,
time.var.sel.pos = "[Tt]i[d(me)]",
time.var.sel.neg = "[Dd]at[eo]"
) )
} }
\arguments{ \arguments{
@ -55,6 +58,14 @@ file with `haven::read_dta()`).}
\item{metadata}{redcap metadata headings. Default is \item{metadata}{redcap metadata headings. Default is
REDCapCAST:::metadata_names.} REDCapCAST:::metadata_names.}
\item{validate.time}{Flag to validate guessed time columns}
\item{time.var.sel.pos}{Positive selection regex string passed to
`gues_time_only_filter()` as sel.pos.}
\item{time.var.sel.neg}{Negative selection regex string passed to
`gues_time_only_filter()` as sel.neg.}
} }
\value{ \value{
list of length 2 list of length 2
@ -73,6 +84,7 @@ Ensure, that the data set is formatted with as much information as possible.
} }
\examples{ \examples{
data <- REDCapCAST::redcapcast_data data <- REDCapCAST::redcapcast_data
data |> ds2dd_detailed(validate.time = TRUE)
data |> ds2dd_detailed() data |> ds2dd_detailed()
iris |> ds2dd_detailed(add.auto.id = TRUE) iris |> ds2dd_detailed(add.auto.id = TRUE)
iris |> iris |>

View File

@ -2,7 +2,7 @@
% Please edit documentation in R/shiny_cast.R % Please edit documentation in R/shiny_cast.R
\name{file_extension} \name{file_extension}
\alias{file_extension} \alias{file_extension}
\title{DEPRECATED Helper to import files correctly} \title{Helper to import files correctly}
\usage{ \usage{
file_extension(filenames) file_extension(filenames)
} }
@ -13,7 +13,7 @@ file_extension(filenames)
character vector character vector
} }
\description{ \description{
DEPRECATED Helper to import files correctly Helper to import files correctly
} }
\examples{ \examples{
file_extension(list.files(here::here(""))[[2]])[[1]] file_extension(list.files(here::here(""))[[2]])[[1]]

View File

@ -1,33 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{guess_time_only}
\alias{guess_time_only}
\title{Guess time variables based on naming pattern}
\usage{
guess_time_only(
data,
validate.time = FALSE,
time.var.sel.pos = "[Tt]i[d(me)]",
time.var.sel.neg = "[Dd]at[eo]"
)
}
\arguments{
\item{data}{data.frame or tibble}
\item{validate.time}{Flag to validate guessed time columns}
\item{time.var.sel.pos}{Positive selection regex string passed to
`gues_time_only_filter()` as sel.pos.}
\item{time.var.sel.neg}{Negative selection regex string passed to
`gues_time_only_filter()` as sel.neg.}
}
\value{
data.frame or tibble
}
\description{
This is for repairing data with time variables with appended "1970-01-01"
}
\examples{
redcapcast_data |> guess_time_only(validate.time = TRUE)
}

View File

@ -1,24 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{haven_all_levels}
\alias{haven_all_levels}
\title{Finish incomplete haven attributes substituting missings with values}
\usage{
haven_all_levels(data)
}
\arguments{
\item{data}{haven labelled variable}
}
\value{
named vector
}
\description{
Finish incomplete haven attributes substituting missings with values
}
\examples{
ds <- structure(c(1, 2, 3, 2, 10, 9),
labels = c(Unknown = 9, Refused = 10),
class = "haven_labelled"
)
ds |> haven_all_levels()
}

View File

@ -1,29 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{numchar2fct}
\alias{numchar2fct}
\title{Applying var2fct across data set}
\usage{
numchar2fct(data, numeric.threshold = 6, character.throshold = 6)
}
\arguments{
\item{data}{dataset. data.frame or tibble}
\item{numeric.threshold}{threshold for var2fct for numeric columns. Default
is 6.}
\item{character.throshold}{threshold for var2fct for character columns.
Default is 6.}
}
\value{
data.frame or tibble
}
\description{
Individual thresholds for character and numeric columns
}
\examples{
mtcars |> str()
mtcars |>
numchar2fct(numeric.threshold = 6) |>
str()
}

View File

@ -1,39 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{parse_data}
\alias{parse_data}
\title{Helper to auto-parse un-formatted data with haven and readr}
\usage{
parse_data(
data,
guess_type = TRUE,
col_types = NULL,
locale = readr::default_locale(),
ignore.vars = "cpr",
...
)
}
\arguments{
\item{data}{data.frame or tibble}
\item{guess_type}{logical to guess type with readr}
\item{col_types}{specify col_types using readr semantics. Ignored if guess_type is TRUE}
\item{locale}{option to specify locale. Defaults to readr::default_locale().}
\item{ignore.vars}{specify column names of columns to ignore when parsing}
\item{...}{ignored}
}
\value{
data.frame or tibble
}
\description{
Helper to auto-parse un-formatted data with haven and readr
}
\examples{
mtcars |>
parse_data() |>
str()
}

View File

@ -19,9 +19,6 @@ A data frame with 22 variables:
\item{age_integer}{Age integer, numeric} \item{age_integer}{Age integer, numeric}
\item{sex}{Legal sex, character} \item{sex}{Legal sex, character}
\item{cohabitation}{Cohabitation status, character} \item{cohabitation}{Cohabitation status, character}
\item{con_calc}{con_calc}
\item{con_mrs}{con_mrs}
\item{consensus_complete}{consensus_complete}
\item{hypertension}{Hypertension, character} \item{hypertension}{Hypertension, character}
\item{diabetes}{diabetes, character} \item{diabetes}{diabetes, character}
\item{region}{region, character} \item{region}{region, character}

View File

@ -1,29 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd_detailed.R
\name{var2fct}
\alias{var2fct}
\title{Convert vector to factor based on threshold of number of unique levels}
\usage{
var2fct(data, unique.n)
}
\arguments{
\item{data}{vector or data.frame column}
\item{unique.n}{threshold to convert class to factor}
}
\value{
vector
}
\description{
This is a wrapper of forcats::as_factor, which sorts numeric vectors before
factoring, but levels character vectors in order of appearance.
}
\examples{
sample(seq_len(4), 20, TRUE) |>
var2fct(6) |>
summary()
sample(letters, 20) |>
var2fct(6) |>
summary()
sample(letters[1:4], 20, TRUE) |> var2fct(6)
}

View File

@ -45,7 +45,7 @@ The more advanced `ds2dd_detailed()` is a natural development. It will try to ap
The dataset should be correctly formatted for the data dictionary to preserve as much information as possible. The dataset should be correctly formatted for the data dictionary to preserve as much information as possible.
```{r eval=FALSE} ```{r eval=TRUE}
d2 <- REDCapCAST::redcapcast_data |> d2 <- REDCapCAST::redcapcast_data |>
dplyr::mutate(record_id = seq_len(dplyr::n()), dplyr::mutate(record_id = seq_len(dplyr::n()),
region=factor(region)) |> region=factor(region)) |>

View File

@ -1,8 +1,8 @@
--- ---
title: "REDCapCAST" title: "Introduction"
output: rmarkdown::html_vignette output: rmarkdown::html_vignette
vignette: > vignette: >
%\VignetteIndexEntry{REDCapCAST} %\VignetteIndexEntry{Introduction}
%\VignetteEngine{knitr::rmarkdown} %\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8} %\VignetteEncoding{UTF-8}
--- ---
@ -31,6 +31,16 @@ redcapcast_data |> gt::gt()
```{r} ```{r}
redcapcast_meta |> gt::gt() redcapcast_meta |> gt::gt()
``` ```
```{r}
list <-
REDCap_split(
records = redcapcast_data,
metadata = redcapcast_meta,
forms = "repeating"
) |>
sanitize_split()
str(list)
```
```{r} ```{r}
list <- list <-

View File

@ -14,79 +14,13 @@ knitr::opts_chunk$set(
) )
``` ```
To make the easiest possible transition from spreadsheet/dataset to REDCap, I have created a small app, which adds a graphical interface to the casting of a data dictionary and data upload. Install the package and launch the app as follows: To make the easiest possible transition from spreadsheet/dataset to REDCap, I have created a small Shiny app, which adds a graphical interface to the casting of a data dictionary and data upload. Install the package and run the app as follows:
```{r eval=FALSE} ```{r eval=FALSE}
REDCapCAST::shiny_cast() require(REDCapCAST)
shiny_cast()
``` ```
The app primarily wraps one function: `ds2dd_detailed()`. The app will launch in a new window and the interface should be fairly self-explanatory.
The app only provides the most basic functionality, but might be extended in the future.
```{r}
library(REDCapCAST)
ds <- REDCap_split(
records = redcapcast_data,
metadata = redcapcast_meta,
forms = "all"
) |>
sanitize_split() |>
redcap_wider()
str(ds)
```
```{r}
ds|>
ds2dd_detailed()|>
purrr::pluck("data") |>
str()
```
```{r}
ds|>
ds2dd_detailed()|>
purrr::pluck("meta") |>
head(10)
```
Different data formats are accepted, which all mostly implements the `readr::col_guess()` functionality to parse column classes.
To ensure uniformity in data import this parsing has been implemented on its own to use with `ds2dd_detailed()` or any other data set for that matter:
```{r}
ds_parsed <- redcapcast_data |>
dplyr::mutate(dplyr::across(dplyr::everything(),as.character)) |>
parse_data()
str(ds_parsed)
```
It will ignore specified columns, which is neat for numeric-looking strings like cpr-with a leading 0:
```{r}
redcapcast_data |>
dplyr::mutate(dplyr::across(dplyr::everything(),as.character)) |>
parse_data(ignore.vars = c("record_id","cpr")) |>
str()
```
```{r}
```
Column classes can be passed to `parse_data()`.
Making a few crude assumption for factorising data, `numchar2fct()` factorises numerical and character vectors based on a set threshold for unique values:
```{r}
mtcars |> str()
mtcars |>
numchar2fct(numeric.threshold = 6) |>
str()
```
```{r}
ds_parsed|>
numchar2fct(numeric.threshold = 2) |>
str()
```