Merge pull request #11 from SpectrumHealthResearch/dev

Merging changes for #9 and #10
This commit is contained in:
kampfsca 2019-08-14 08:55:27 -04:00 committed by GitHub
commit 8685e6df4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 336 additions and 123 deletions

View File

@ -2,3 +2,4 @@
^\.Rproj\.user$
^\.travis\.yml$
^appveyor\.yml$
^data-raw$

View File

@ -1,6 +1,6 @@
Package: REDCapRITS
Title: REDCap Repeating Instrument Table Splitter
Version: 0.0.0
Version: 0.2.0
Authors@R: c(
person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut", "cre")),
person("Spectrum Health, Grand Rapids, MI", role = "cph"))
@ -18,9 +18,10 @@ Suggests:
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
RoxygenNote: 6.1.1
URL: https://github.com/SpectrumHealthResearch/REDCapRITS
BugReports: https://github.com/SpectrumHealthResearch/REDCapRITS/issues
Collate:
'utils.r'
'process_user_input.r'
'REDCap_split.r'

15
R/NEWS.md Normal file
View File

@ -0,0 +1,15 @@
# REDCapRITS 0.2.0 (Release date: 2019-??-??)
* [feature] User can now separate each form into its own data.frame, regardless if it is a repeating instrument or not. (#10)
* [bug] Handles auto-generated form timestamp fields.
# REDCapRITS 0.1.0 (Release date: 2019-07-01)
* [feature] User can now specify the name of the 'primary' table, which previously was left blank. (#9)
* [bug] Keeps REDCap-generated fields in non-repeating data.frame that are not present in metadata file. (#7)
* [enhancement] Unit tests created. (#6)
* [bug] Checkbox data now supported. (#1)
# REDCapRITS 0.0.0 (Release date: 2018-06-03)
* Initial Release

View File

@ -10,6 +10,11 @@
#' @param metadata Project metadata (the data dictionary). May be a
#' \code{data.frame}, \code{response}, or \code{character} vector containing
#' JSON from an API call.
#' @param primary_table_name Name given to the list element for the primary
#' output table (as described in \emph{README.md}). Ignored if
#' \code{forms = 'all'}.
#' @param forms Indicate whether to create separate tables for repeating
#' instruments only or for all forms.
#' @author Paul W. Egeler, M.S., GStat
#' @examples
#' \dontrun{
@ -64,11 +69,21 @@
#' # Split the tables
#' REDCapRITS::REDCap_split(data, metadata)
#' }
#' @return A list of \code{"data.frame"}s: one base table and zero or more
#' tables for each repeating instrument.
#' @include process_user_input.r
#' @return A list of \code{"data.frame"}s. The number of tables will differ
#' depending on the \code{forms} option selected.
#' \itemize{
#' \item \code{'repeating'}: one base table and one or more
#' tables for each repeating instrument.
#' \item \code{'all'}: a data.frame for each instrument, regardless of
#' whether it is a repeating instrument or not.
#' }
#' @include process_user_input.r utils.r
#' @export
REDCap_split <- function(records, metadata) {
REDCap_split <- function(records,
metadata,
primary_table_name = "",
forms = c("repeating", "all")
) {
# Process user input
records <- process_user_input(records)
@ -77,103 +92,22 @@ REDCap_split <- function(records, metadata) {
# Get the variable names in the dataset
vars_in_data <- names(records)
# Match arg for forms
forms <- match.arg(forms)
# Check to see if there were any repeating instruments
if (!any(vars_in_data == "redcap_repeat_instrument")) {
message("There are no repeating instruments in this data.")
return(list(records))
if (forms == "repeating" && !"redcap_repeat_instrument" %in% vars_in_data) {
stop("There are no repeating instruments in this dataset.")
}
# Standardize variable names for metadata
names(metadata) <- c(
"field_name", "form_name", "section_header", "field_type",
"field_label", "select_choices_or_calculations", "field_note",
"text_validation_type_or_show_slider_number", "text_validation_min",
"text_validation_max", "identifier", "branching_logic", "required_field",
"custom_alignment", "question_number", "matrix_group_name", "matrix_ranking",
"field_annotation"
)
names(metadata) <- metadata_names
# Make sure that no metadata columns are factors
metadata <- rapply(metadata, as.character, classes = "factor", how = "replace")
# Find the fields and associated form
fields <- metadata[
!metadata$field_type %in% c("descriptive", "checkbox"),
c("field_name", "form_name")
]
# Process instrument status fields
form_names <- unique(metadata$form_name)
form_complete_fields <- data.frame(
field_name = paste0(form_names, "_complete"),
form_name = form_names,
stringsAsFactors = FALSE
)
fields <- rbind(fields, form_complete_fields)
# Process checkbox fields
if (any(metadata$field_type == "checkbox")) {
checkbox_basenames <- metadata[
metadata$field_type == "checkbox",
c("field_name", "form_name")
]
checkbox_fields <-
do.call(
"rbind",
apply(
checkbox_basenames,
1,
function(x, y)
data.frame(
field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
),
y = vars_in_data
)
)
fields <- rbind(fields, checkbox_fields)
}
# Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <-
do.call(
"rbind",
apply(
fields,
1,
function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
if (any(field_indices))
data.frame(
field_name = y[field_indices],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
)
},
y = vars_in_data
)
)
fields <- rbind(fields, factor_fields)
}
# Identify the subtables in the data
subtables <- unique(records$redcap_repeat_instrument)
subtables <- subtables[subtables != ""]
fields <- match_fields_to_form(metadata, vars_in_data)
# Variables to be present in each output table
universal_fields <- c(
@ -186,43 +120,74 @@ REDCap_split <- function(records, metadata) {
)
)
# Variables to be at the beginning of each repeating instrument
repeat_instrument_fields <- grep(
"^redcap_repeat.*",
vars_in_data,
value = TRUE
)
if ("redcap_repeat_instrument" %in% vars_in_data) {
# Variables to be at the beginning of each repeating instrument
repeat_instrument_fields <- grep(
"^redcap_repeat.*",
vars_in_data,
value = TRUE
)
# Identify the subtables in the data
subtables <- unique(records$redcap_repeat_instrument)
subtables <- subtables[subtables != ""]
# Split the table based on instrument
out <- split.data.frame(records, records$redcap_repeat_instrument)
# Split the table based on instrument
out <- split.data.frame(records, records$redcap_repeat_instrument)
primary_table_index <- which(names(out) == "")
# Delete the variables that are not relevant
for (i in names(out)) {
if (forms == "repeating" && primary_table_name %in% subtables) {
warning("The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank.")
primary_table_name <- ""
} else if (primary_table_name > "") {
names(out)[[primary_table_index]] <- primary_table_name
}
if (i == "") {
# Delete the variables that are not relevant
for (i in names(out)) {
out_fields <- which(
vars_in_data %in% c(
universal_fields,
fields[!fields[,2] %in% subtables, 1]
if (i == primary_table_name) {
out_fields <- which(
vars_in_data %in% c(
universal_fields,
fields[!fields[,2] %in% subtables, 1]
)
)
)
out[[which(names(out) == "")]] <- out[[which(names(out) == "")]][out_fields]
out[[primary_table_index]] <- out[[primary_table_index]][out_fields]
} else {
} else {
out_fields <- which(
vars_in_data %in% c(
universal_fields,
repeat_instrument_fields,
fields[fields[,2] == i, 1]
out_fields <- which(
vars_in_data %in% c(
universal_fields,
repeat_instrument_fields,
fields[fields[,2] == i, 1]
)
)
)
out[[i]] <- out[[i]][out_fields]
out[[i]] <- out[[i]][out_fields]
}
}
if (forms == "all") {
out <- c(
split_non_repeating_forms(
out[[primary_table_index]],
universal_fields,
fields[!fields[,2] %in% subtables,]
),
out[-primary_table_index]
)
}
} else {
out <- split_non_repeating_forms(records, universal_fields, fields)
}
out

BIN
R/R/sysdata.rda Normal file

Binary file not shown.

106
R/R/utils.r Normal file
View File

@ -0,0 +1,106 @@
match_fields_to_form <- function(metadata, vars_in_data) {
fields <- metadata[
!metadata$field_type %in% c("descriptive", "checkbox"),
c("field_name", "form_name")
]
# Process instrument status fields
form_names <- unique(metadata$form_name)
form_complete_fields <- data.frame(
field_name = paste0(form_names, "_complete"),
form_name = form_names,
stringsAsFactors = FALSE
)
fields <- rbind(fields, form_complete_fields)
# Process survey timestamps
timestamps <- intersect(vars_in_data, paste0(form_names, "_timestamp"))
if (length(timestamps)) {
timestamp_fields <- data.frame(
field_name = timestamps,
form_name = sub("_timestamp$", "", timestamps),
stringsAsFactors = FALSE
)
fields <- rbind(fields, timestamp_fields)
}
# Process checkbox fields
if (any(metadata$field_type == "checkbox")) {
checkbox_basenames <- metadata[
metadata$field_type == "checkbox",
c("field_name", "form_name")
]
checkbox_fields <-
do.call(
"rbind",
apply(
checkbox_basenames,
1,
function(x, y)
data.frame(
field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
),
y = vars_in_data
)
)
fields <- rbind(fields, checkbox_fields)
}
# Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <-
do.call(
"rbind",
apply(
fields,
1,
function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
if (any(field_indices))
data.frame(
field_name = y[field_indices],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
)
},
y = vars_in_data
)
)
fields <- rbind(fields, factor_fields)
}
fields
}
split_non_repeating_forms <- function(table, universal_fields, fields) {
forms <- unique(fields[[2]])
x <- lapply(
forms,
function (x) {
table[names(table) %in% union(universal_fields, fields[fields[,2] == x,1])]
})
structure(x, names = forms)
}

View File

@ -0,0 +1,10 @@
metadata_names <- c(
"field_name", "form_name", "section_header", "field_type",
"field_label", "select_choices_or_calculations", "field_note",
"text_validation_type_or_show_slider_number", "text_validation_min",
"text_validation_max", "identifier", "branching_logic", "required_field",
"custom_alignment", "question_number", "matrix_group_name", "matrix_ranking",
"field_annotation"
)
usethis::use_data(metadata_names, overwrite = TRUE, internal = TRUE)

View File

@ -4,7 +4,8 @@
\alias{REDCap_split}
\title{Split REDCap repeating instruments table into multiple tables}
\usage{
REDCap_split(records, metadata)
REDCap_split(records, metadata, primary_table_name = "",
forms = c("repeating", "all"))
}
\arguments{
\item{records}{Exported project records. May be a \code{data.frame},
@ -14,10 +15,23 @@ call.}
\item{metadata}{Project metadata (the data dictionary). May be a
\code{data.frame}, \code{response}, or \code{character} vector containing
JSON from an API call.}
\item{primary_table_name}{Name given to the list element for the primary
output table (as described in \emph{README.md}). Ignored if
\code{forms = 'all'}.}
\item{forms}{Indicate whether to create separate tables for repeating
instruments only or for all forms.}
}
\value{
A list of \code{"data.frame"}s: one base table and zero or more
tables for each repeating instrument.
A list of \code{"data.frame"}s. The number of tables will differ
depending on the \code{forms} option selected.
\itemize{
\item \code{'repeating'}: one base table and one or more
tables for each repeating instrument.
\item \code{'all'}: a data.frame for each instrument, regardless of
whether it is a repeating instrument or not.
}
}
\description{
This will take output from a REDCap export and split it into a base table

View File

@ -0,0 +1,63 @@
context("Using the `forms = 'all'` argument")
# Global variables --------------------------------------------------------
# Cars
metadata <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_metadata.json"
)
)
records <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_records.json"
)
)
redcap_output_json <- REDCap_split(records, metadata, forms = "all")
# Longitudinal
file_paths <- sapply(
c(
records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv",
metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv"
), get_data_location
)
redcap <- lapply(file_paths, read.csv, stringsAsFactors = FALSE)
redcap[["metadata"]] <- with(redcap, metadata[metadata[,1] > "",])
redcap_output_long <- with(redcap, REDCap_split(records, metadata, forms = "all"))
redcap_long_names <- names(redcap[[1]])
# Tests -------------------------------------------------------------------
test_that("Each form is an element in the list", {
expect_length(redcap_output_json, 3L)
expect_identical(names(redcap_output_json), c("motor_trend_cars", "grouping", "sale"))
})
test_that("All variables land somewhere", {
expect_true(setequal(names(records), Reduce("union", sapply(redcap_output_json, names))))
})
test_that("Primary table name is ignored", {
expect_identical(
REDCap_split(records, metadata, "HELLO", "all"),
redcap_output_json
)
})
test_that("Supports longitudinal data", {
# setdiff(redcap_long_names, Reduce("union", sapply(redcap_output_long, names)))
## [1] "informed_consent_and_addendum_timestamp"
expect_true(setequal(redcap_long_names, Reduce("union", sapply(redcap_output_long, names))))
})

View File

@ -13,5 +13,5 @@ test_that("CSV export matches reference", {
redcap_output <- with(redcap, REDCap_split(records, metadata))
expect_known_hash(redcap_output, "dff3a52955")
expect_known_hash(redcap_output, "0934bcb292")
})

View File

@ -0,0 +1,38 @@
context("Primary table name processing")
# Global variables -------------------------------------------------------
metadata <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_metadata.json"
)
)
records <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_records.json"
)
)
ref_hash <- "2c8b6531597182af1248f92124161e0c"
# Tests -------------------------------------------------------------------
test_that("Will not use a repeating instrument name for primary table", {
redcap_output_json1 <- expect_warning(
REDCap_split(records, metadata, "sale"),
"primary table"
)
expect_known_hash(redcap_output_json1, ref_hash)
})
test_that("Names are set correctly and output is identical", {
redcap_output_json2 <- REDCap_split(records, metadata, "main")
expect_identical(names(redcap_output_json2), c("main", "sale"))
expect_known_hash(setNames(redcap_output_json2, c("", "sale")), ref_hash)
})