2018-01-19 17:50:08 +01:00
|
|
|
#' Split REDCap repeating instruments table into multiple tables
|
|
|
|
#'
|
2018-06-03 22:08:26 +02:00
|
|
|
#' This will take output from a REDCap export and split it into a base table
|
|
|
|
#' and child tables for each repeating instrument. Metadata
|
2018-01-19 17:50:08 +01:00
|
|
|
#' is used to determine which fields should be included in each resultant table.
|
|
|
|
#'
|
2018-06-23 04:24:34 +02:00
|
|
|
#' @param records Exported project records. May be a \code{data.frame},
|
|
|
|
#' \code{response}, or \code{character} vector containing JSON from an API
|
2018-06-03 22:08:26 +02:00
|
|
|
#' call.
|
2018-06-23 04:24:34 +02:00
|
|
|
#' @param metadata Project metadata (the data dictionary). May be a
|
|
|
|
#' \code{data.frame}, \code{response}, or \code{character} vector containing
|
|
|
|
#' JSON from an API call.
|
2019-07-08 18:19:33 +02:00
|
|
|
#' @param primary_table_name Name given to the list element for the primary
|
|
|
|
#' output table (as described in \emph{README.md}). Ignored if
|
|
|
|
#' \code{forms = 'all'}.
|
|
|
|
#' @param forms Indicate whether to create separate tables for repeating
|
|
|
|
#' instruments only or for all forms.
|
2018-01-19 17:50:08 +01:00
|
|
|
#' @author Paul W. Egeler, M.S., GStat
|
|
|
|
#' @examples
|
|
|
|
#' \dontrun{
|
2018-06-09 05:24:35 +02:00
|
|
|
#' # Using an API call -------------------------------------------------------
|
|
|
|
#'
|
2018-01-19 17:50:08 +01:00
|
|
|
#' library(RCurl)
|
2018-05-25 18:02:21 +02:00
|
|
|
#'
|
2018-01-19 17:50:08 +01:00
|
|
|
#' # Get the records
|
2018-06-03 22:46:25 +02:00
|
|
|
#' records <- postForm(
|
2018-06-09 05:24:35 +02:00
|
|
|
#' uri = api_url, # Supply your site-specific URI
|
|
|
|
#' token = api_token, # Supply your own API token
|
|
|
|
#' content = 'record',
|
|
|
|
#' format = 'json',
|
|
|
|
#' returnFormat = 'json'
|
2018-01-19 17:50:08 +01:00
|
|
|
#' )
|
2018-05-25 18:02:21 +02:00
|
|
|
#'
|
2018-06-03 22:46:25 +02:00
|
|
|
#' # Get the metadata
|
|
|
|
#' metadata <- postForm(
|
2018-06-09 05:24:35 +02:00
|
|
|
#' uri = api_url, # Supply your site-specific URI
|
|
|
|
#' token = api_token, # Supply your own API token
|
|
|
|
#' content = 'metadata',
|
|
|
|
#' format = 'json'
|
2018-06-03 22:46:25 +02:00
|
|
|
#' )
|
|
|
|
#'
|
2018-06-03 22:08:26 +02:00
|
|
|
#' # Convert exported JSON strings into a list of data.frames
|
2018-06-09 05:24:35 +02:00
|
|
|
#' REDCapRITS::REDCap_split(records, metadata)
|
|
|
|
#'
|
|
|
|
#' # Using a raw data export -------------------------------------------------
|
|
|
|
#'
|
|
|
|
#' # Get the records
|
|
|
|
#' records <- read.csv("/path/to/data/ExampleProject_DATA_2018-06-03_1700.csv")
|
|
|
|
#'
|
|
|
|
#' # Get the metadata
|
2023-04-13 10:57:04 +02:00
|
|
|
#' metadata <- read.csv(
|
|
|
|
#' "/path/to/data/ExampleProject_DataDictionary_2018-06-03.csv")
|
2018-06-09 05:24:35 +02:00
|
|
|
#'
|
|
|
|
#' # Split the tables
|
|
|
|
#' REDCapRITS::REDCap_split(records, metadata)
|
|
|
|
#'
|
|
|
|
#' # In conjunction with the R export script ---------------------------------
|
|
|
|
#'
|
2023-04-13 10:57:04 +02:00
|
|
|
#' # You must set the working directory first since the REDCap data export
|
|
|
|
#' # script contains relative file references.
|
2023-06-05 08:35:34 +02:00
|
|
|
#' old <- getwd()
|
2018-06-09 05:24:35 +02:00
|
|
|
#' setwd("/path/to/data/")
|
|
|
|
#'
|
|
|
|
#' # Run the data export script supplied by REDCap.
|
|
|
|
#' # This will create a data.frame of your records called 'data'
|
|
|
|
#' source("ExampleProject_R_2018-06-03_1700.r")
|
|
|
|
#'
|
2023-06-05 08:35:34 +02:00
|
|
|
#' # Get the metadatan
|
2018-06-09 05:24:35 +02:00
|
|
|
#' metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
|
|
|
|
#'
|
|
|
|
#' # Split the tables
|
|
|
|
#' REDCapRITS::REDCap_split(data, metadata)
|
2023-06-05 08:35:34 +02:00
|
|
|
#' setwd(old)
|
2018-01-19 17:50:08 +01:00
|
|
|
#' }
|
2019-07-08 18:19:33 +02:00
|
|
|
#' @return A list of \code{"data.frame"}s. The number of tables will differ
|
|
|
|
#' depending on the \code{forms} option selected.
|
|
|
|
#' \itemize{
|
|
|
|
#' \item \code{'repeating'}: one base table and one or more
|
|
|
|
#' tables for each repeating instrument.
|
|
|
|
#' \item \code{'all'}: a data.frame for each instrument, regardless of
|
|
|
|
#' whether it is a repeating instrument or not.
|
|
|
|
#' }
|
2019-07-01 22:54:29 +02:00
|
|
|
#' @include process_user_input.r utils.r
|
2018-01-19 17:50:08 +01:00
|
|
|
#' @export
|
2019-07-01 22:54:29 +02:00
|
|
|
REDCap_split <- function(records,
|
|
|
|
metadata,
|
2019-07-08 18:19:33 +02:00
|
|
|
primary_table_name = "",
|
2023-01-16 09:49:17 +01:00
|
|
|
forms = c("repeating", "all")) {
|
2023-03-06 14:36:32 +01:00
|
|
|
|
2018-06-04 16:40:16 +02:00
|
|
|
# Process user input
|
2018-06-23 04:24:34 +02:00
|
|
|
records <- process_user_input(records)
|
2023-01-16 09:49:17 +01:00
|
|
|
metadata <-
|
2023-04-14 11:46:09 +02:00
|
|
|
as.data.frame(process_user_input(metadata))
|
|
|
|
|
|
|
|
# Process repeat instrument names to match the redcap naming
|
|
|
|
records$redcap_repeat_instrument <- clean_redcap_name(records$redcap_repeat_instrument)
|
|
|
|
|
2019-07-26 23:26:33 +02:00
|
|
|
|
2018-06-06 23:55:01 +02:00
|
|
|
# Get the variable names in the dataset
|
|
|
|
vars_in_data <- names(records)
|
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
# Match arg for forms
|
2023-04-14 11:46:09 +02:00
|
|
|
forms <- match.arg(forms, c("repeating", "all"))
|
2019-07-08 18:19:33 +02:00
|
|
|
|
2018-05-25 18:02:21 +02:00
|
|
|
# Check to see if there were any repeating instruments
|
2023-01-16 09:49:17 +01:00
|
|
|
if (forms == "repeating" &&
|
|
|
|
!"redcap_repeat_instrument" %in% vars_in_data) {
|
2019-07-01 22:54:29 +02:00
|
|
|
stop("There are no repeating instruments in this dataset.")
|
2018-05-25 18:02:21 +02:00
|
|
|
}
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2019-07-26 23:26:33 +02:00
|
|
|
# Remove NAs from `redcap_repeat_instrument` (see issue #12)
|
2023-01-16 09:49:17 +01:00
|
|
|
if (any(is.na(records$redcap_repeat_instrument))) {
|
2019-07-26 23:26:33 +02:00
|
|
|
records$redcap_repeat_instrument <- ifelse(
|
|
|
|
is.na(records$redcap_repeat_instrument),
|
|
|
|
"",
|
|
|
|
as.character(records$redcap_repeat_instrument)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2018-06-04 16:40:16 +02:00
|
|
|
# Standardize variable names for metadata
|
2023-03-06 14:36:32 +01:00
|
|
|
# names(metadata) <- metadata_names
|
2018-06-04 16:40:16 +02:00
|
|
|
|
|
|
|
# Make sure that no metadata columns are factors
|
2023-01-16 09:49:17 +01:00
|
|
|
metadata <-
|
|
|
|
rapply(metadata, as.character, classes = "factor", how = "replace")
|
2018-06-04 16:40:16 +02:00
|
|
|
|
2018-06-01 23:41:27 +02:00
|
|
|
# Find the fields and associated form
|
2019-07-01 22:54:29 +02:00
|
|
|
fields <- match_fields_to_form(metadata, vars_in_data)
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2018-06-23 08:04:26 +02:00
|
|
|
# Variables to be present in each output table
|
|
|
|
universal_fields <- c(
|
|
|
|
vars_in_data[1],
|
|
|
|
grep(
|
|
|
|
"^redcap_(?!(repeat)).*",
|
|
|
|
vars_in_data,
|
|
|
|
value = TRUE,
|
|
|
|
perl = TRUE
|
2018-06-06 23:55:01 +02:00
|
|
|
)
|
2018-06-23 08:04:26 +02:00
|
|
|
)
|
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
if ("redcap_repeat_instrument" %in% vars_in_data) {
|
|
|
|
# Variables to be at the beginning of each repeating instrument
|
2023-01-16 09:49:17 +01:00
|
|
|
repeat_instrument_fields <- grep("^redcap_repeat.*",
|
|
|
|
vars_in_data,
|
|
|
|
value = TRUE)
|
2018-06-06 23:55:01 +02:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
# Identify the subtables in the data
|
|
|
|
subtables <- unique(records$redcap_repeat_instrument)
|
|
|
|
subtables <- subtables[subtables != ""]
|
2019-07-01 22:54:29 +02:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
# Split the table based on instrument
|
2023-01-16 09:49:17 +01:00
|
|
|
out <-
|
|
|
|
split.data.frame(records, records$redcap_repeat_instrument)
|
2019-07-09 00:01:48 +02:00
|
|
|
primary_table_index <- which(names(out) == "")
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
if (forms == "repeating" && primary_table_name %in% subtables) {
|
2023-01-16 09:49:17 +01:00
|
|
|
warning(
|
2023-04-13 10:57:04 +02:00
|
|
|
"The label given to the primary table is already used by a repeating
|
|
|
|
instrument. The primary table label will be left blank."
|
2023-01-16 09:49:17 +01:00
|
|
|
)
|
2019-07-08 18:19:33 +02:00
|
|
|
primary_table_name <- ""
|
|
|
|
} else if (primary_table_name > "") {
|
2019-07-09 00:01:48 +02:00
|
|
|
names(out)[[primary_table_index]] <- primary_table_name
|
2019-07-08 18:19:33 +02:00
|
|
|
}
|
2019-07-01 22:54:29 +02:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
# Delete the variables that are not relevant
|
|
|
|
for (i in names(out)) {
|
|
|
|
if (i == primary_table_name) {
|
2023-01-16 09:49:17 +01:00
|
|
|
out_fields <- which(vars_in_data %in% c(universal_fields,
|
2023-04-13 10:57:04 +02:00
|
|
|
fields[!fields[, 2] %in%
|
|
|
|
subtables, 1]))
|
2023-01-16 09:49:17 +01:00
|
|
|
out[[primary_table_index]] <-
|
|
|
|
out[[primary_table_index]][out_fields]
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
} else {
|
2023-01-16 09:49:17 +01:00
|
|
|
out_fields <- which(vars_in_data %in% c(universal_fields,
|
|
|
|
repeat_instrument_fields,
|
|
|
|
fields[fields[, 2] == i, 1]))
|
2019-07-08 18:19:33 +02:00
|
|
|
out[[i]] <- out[[i]][out_fields]
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
}
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2019-07-08 18:19:33 +02:00
|
|
|
}
|
2019-07-09 00:01:48 +02:00
|
|
|
|
|
|
|
if (forms == "all") {
|
2023-01-16 09:49:17 +01:00
|
|
|
out <- c(split_non_repeating_forms(out[[primary_table_index]],
|
|
|
|
universal_fields,
|
|
|
|
fields[!fields[, 2] %in% subtables, ]),
|
|
|
|
out[-primary_table_index])
|
2019-07-09 00:01:48 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
out <- split_non_repeating_forms(records, universal_fields, fields)
|
|
|
|
|
2018-05-25 18:02:21 +02:00
|
|
|
}
|
2018-01-19 17:50:08 +01:00
|
|
|
|
2018-06-03 22:08:26 +02:00
|
|
|
out
|
2018-01-19 17:50:08 +01:00
|
|
|
|
|
|
|
}
|