diff --git a/R/DESCRIPTION b/R/DESCRIPTION index 5372597..0a493f5 100644 --- a/R/DESCRIPTION +++ b/R/DESCRIPTION @@ -1,6 +1,6 @@ Package: REDCapRITS Title: REDCap Repeating Instrument Table Splitter -Version: 0.0.0 +Version: 0.1.0 Authors@R: c( person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut", "cre")), person("Spectrum Health, Grand Rapids, MI", role = "cph")) @@ -18,9 +18,10 @@ Suggests: License: GPL-3 Encoding: UTF-8 LazyData: true -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.1 URL: https://github.com/SpectrumHealthResearch/REDCapRITS BugReports: https://github.com/SpectrumHealthResearch/REDCapRITS/issues Collate: + 'utils.r' 'process_user_input.r' 'REDCap_split.r' diff --git a/R/NEWS.md b/R/NEWS.md new file mode 100644 index 0000000..4f5c340 --- /dev/null +++ b/R/NEWS.md @@ -0,0 +1,10 @@ +# REDCapRITS 0.1.0 (Release date: 2019-07-01) + + * [feature] User can now specify the name of the 'primary' table, which previously was left blank. (#9) + * [bug] Keeps REDCap-generated fields in non-repeating data.frame that are not present in metadata file. (#7) + * [enhancement] Unit tests created. (#6) + * [bug] Checkbox data now supported. (#1) + +# REDCapRITS 0.0.0 (Release date: 2018-06-03) + + * Initial Release diff --git a/R/R/REDCap_split.r b/R/R/REDCap_split.r index 14f499d..7e9236c 100644 --- a/R/R/REDCap_split.r +++ b/R/R/REDCap_split.r @@ -10,6 +10,8 @@ #' @param metadata Project metadata (the data dictionary). May be a #' \code{data.frame}, \code{response}, or \code{character} vector containing #' JSON from an API call. +#' @param primary_table_label Name of the label given to the list element for +#' the primary output table (as described in *README.md*). #' @author Paul W. Egeler, M.S., GStat #' @examples #' \dontrun{ @@ -66,9 +68,12 @@ #' } #' @return A list of \code{"data.frame"}s: one base table and zero or more #' tables for each repeating instrument. -#' @include process_user_input.r +#' @include process_user_input.r utils.r #' @export -REDCap_split <- function(records, metadata) { +REDCap_split <- function(records, + metadata, + primary_table_label = "" +) { # Process user input records <- process_user_input(records) @@ -78,12 +83,8 @@ REDCap_split <- function(records, metadata) { vars_in_data <- names(records) # Check to see if there were any repeating instruments - if (!any(vars_in_data == "redcap_repeat_instrument")) { - - message("There are no repeating instruments in this data.") - - return(list(records)) - + if (!"redcap_repeat_instrument" %in% vars_in_data) { + stop("There are no repeating instruments in this dataset.") } # Standardize variable names for metadata @@ -93,80 +94,7 @@ REDCap_split <- function(records, metadata) { metadata <- rapply(metadata, as.character, classes = "factor", how = "replace") # Find the fields and associated form - fields <- metadata[ - !metadata$field_type %in% c("descriptive", "checkbox"), - c("field_name", "form_name") - ] - - # Process instrument status fields - form_names <- unique(metadata$form_name) - form_complete_fields <- data.frame( - field_name = paste0(form_names, "_complete"), - form_name = form_names, - stringsAsFactors = FALSE - ) - - fields <- rbind(fields, form_complete_fields) - - # Process checkbox fields - if (any(metadata$field_type == "checkbox")) { - - checkbox_basenames <- metadata[ - metadata$field_type == "checkbox", - c("field_name", "form_name") - ] - - checkbox_fields <- - do.call( - "rbind", - apply( - checkbox_basenames, - 1, - function(x, y) - data.frame( - field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)], - form_name = x[2], - stringsAsFactors = FALSE, - row.names = NULL - ), - y = vars_in_data - ) - ) - - fields <- rbind(fields, checkbox_fields) - - } - - # Process ".*\\.factor" fields supplied by REDCap's export data R script - if (any(grepl("\\.factor$", vars_in_data))) { - - factor_fields <- - do.call( - "rbind", - apply( - fields, - 1, - function(x, y) { - field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y) - if (any(field_indices)) - data.frame( - field_name = y[field_indices], - form_name = x[2], - stringsAsFactors = FALSE, - row.names = NULL - ) - }, - y = vars_in_data - ) - ) - - fields <- rbind(fields, factor_fields) - - } - - # Identify the subtables in the data - subtables <- unique(records$redcap_repeat_instrument) - subtables <- subtables[subtables != ""] + fields <- match_fields_to_form(metadata, vars_in_data) # Variables to be present in each output table universal_fields <- c( @@ -187,13 +115,26 @@ REDCap_split <- function(records, metadata) { ) + # Identify the subtables in the data + subtables <- unique(records$redcap_repeat_instrument) + subtables <- subtables[subtables != ""] + # Split the table based on instrument out <- split.data.frame(records, records$redcap_repeat_instrument) + if (primary_table_label %in% subtables) { + warning( + "The label given to the primary table is already used by a repeating instrument.\n", + "The primary table label will be left blank." + ) + } else if (primary_table_label > "") { + names(out)[[which(names(out) == "")]] <- primary_table_label + } + # Delete the variables that are not relevant for (i in names(out)) { - if (i == "") { + if (i == primary_table_label) { out_fields <- which( vars_in_data %in% c( @@ -201,7 +142,7 @@ REDCap_split <- function(records, metadata) { fields[!fields[,2] %in% subtables, 1] ) ) - out[[which(names(out) == "")]] <- out[[which(names(out) == "")]][out_fields] + out[[which(names(out) == primary_table_label)]] <- out[[which(names(out) == primary_table_label)]][out_fields] } else { diff --git a/R/R/utils.r b/R/R/utils.r new file mode 100644 index 0000000..9115671 --- /dev/null +++ b/R/R/utils.r @@ -0,0 +1,76 @@ +match_fields_to_form <- function(metadata, vars_in_data) { + + fields <- metadata[ + !metadata$field_type %in% c("descriptive", "checkbox"), + c("field_name", "form_name") + ] + + # Process instrument status fields + form_names <- unique(metadata$form_name) + form_complete_fields <- data.frame( + field_name = paste0(form_names, "_complete"), + form_name = form_names, + stringsAsFactors = FALSE + ) + + fields <- rbind(fields, form_complete_fields) + + # Process checkbox fields + if (any(metadata$field_type == "checkbox")) { + + checkbox_basenames <- metadata[ + metadata$field_type == "checkbox", + c("field_name", "form_name") + ] + + checkbox_fields <- + do.call( + "rbind", + apply( + checkbox_basenames, + 1, + function(x, y) + data.frame( + field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)], + form_name = x[2], + stringsAsFactors = FALSE, + row.names = NULL + ), + y = vars_in_data + ) + ) + + fields <- rbind(fields, checkbox_fields) + + } + + # Process ".*\\.factor" fields supplied by REDCap's export data R script + if (any(grepl("\\.factor$", vars_in_data))) { + + factor_fields <- + do.call( + "rbind", + apply( + fields, + 1, + function(x, y) { + field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y) + if (any(field_indices)) + data.frame( + field_name = y[field_indices], + form_name = x[2], + stringsAsFactors = FALSE, + row.names = NULL + ) + }, + y = vars_in_data + ) + ) + + fields <- rbind(fields, factor_fields) + + } + + fields + + } diff --git a/R/man/REDCap_split.Rd b/R/man/REDCap_split.Rd index 8bd4793..9fdf018 100644 --- a/R/man/REDCap_split.Rd +++ b/R/man/REDCap_split.Rd @@ -4,7 +4,7 @@ \alias{REDCap_split} \title{Split REDCap repeating instruments table into multiple tables} \usage{ -REDCap_split(records, metadata) +REDCap_split(records, metadata, primary_table_label = "") } \arguments{ \item{records}{Exported project records. May be a \code{data.frame}, @@ -14,6 +14,9 @@ call.} \item{metadata}{Project metadata (the data dictionary). May be a \code{data.frame}, \code{response}, or \code{character} vector containing JSON from an API call.} + +\item{primary_table_label}{Name of the label given to the list element for +the primary output table (as described in *README.md*).} } \value{ A list of \code{"data.frame"}s: one base table and zero or more