mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2025-04-04 07:02:31 +02:00
231 lines
6.3 KiB
R
231 lines
6.3 KiB
R
#' Split REDCap repeating instruments table into multiple tables
|
|
#'
|
|
#' This will take output from a REDCap export and split it into a base table
|
|
#' and child tables for each repeating instrument. Metadata
|
|
#' is used to determine which fields should be included in each resultant table.
|
|
#'
|
|
#' @param records Exported project records. May be a \code{data.frame},
|
|
#' \code{response}, or \code{character} vector containing JSON from an API
|
|
#' call.
|
|
#' @param metadata Project metadata (the data dictionary). May be a
|
|
#' \code{data.frame}, \code{response}, or \code{character} vector containing
|
|
#' JSON from an API call.
|
|
#' @author Paul W. Egeler, M.S., GStat
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' # Using an API call -------------------------------------------------------
|
|
#'
|
|
#' library(RCurl)
|
|
#'
|
|
#' # Get the records
|
|
#' records <- postForm(
|
|
#' uri = api_url, # Supply your site-specific URI
|
|
#' token = api_token, # Supply your own API token
|
|
#' content = 'record',
|
|
#' format = 'json',
|
|
#' returnFormat = 'json'
|
|
#' )
|
|
#'
|
|
#' # Get the metadata
|
|
#' metadata <- postForm(
|
|
#' uri = api_url, # Supply your site-specific URI
|
|
#' token = api_token, # Supply your own API token
|
|
#' content = 'metadata',
|
|
#' format = 'json'
|
|
#' )
|
|
#'
|
|
#' # Convert exported JSON strings into a list of data.frames
|
|
#' REDCapRITS::REDCap_split(records, metadata)
|
|
#'
|
|
#' # Using a raw data export -------------------------------------------------
|
|
#'
|
|
#' # Get the records
|
|
#' records <- read.csv("/path/to/data/ExampleProject_DATA_2018-06-03_1700.csv")
|
|
#'
|
|
#' # Get the metadata
|
|
#' metadata <- read.csv("/path/to/data/ExampleProject_DataDictionary_2018-06-03.csv")
|
|
#'
|
|
#' # Split the tables
|
|
#' REDCapRITS::REDCap_split(records, metadata)
|
|
#'
|
|
#' # In conjunction with the R export script ---------------------------------
|
|
#'
|
|
#' # You must set the working directory first since the REDCap data export script
|
|
#' # contains relative file references.
|
|
#' setwd("/path/to/data/")
|
|
#'
|
|
#' # Run the data export script supplied by REDCap.
|
|
#' # This will create a data.frame of your records called 'data'
|
|
#' source("ExampleProject_R_2018-06-03_1700.r")
|
|
#'
|
|
#' # Get the metadata
|
|
#' metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
|
|
#'
|
|
#' # Split the tables
|
|
#' REDCapRITS::REDCap_split(data, metadata)
|
|
#' }
|
|
#' @return A list of \code{"data.frame"}s: one base table and zero or more
|
|
#' tables for each repeating instrument.
|
|
#' @include process_user_input.r
|
|
#' @export
|
|
REDCap_split <- function(records, metadata) {
|
|
|
|
# Process user input
|
|
records <- process_user_input(records)
|
|
metadata <- process_user_input(metadata)
|
|
|
|
# Get the variable names in the dataset
|
|
vars_in_data <- names(records)
|
|
|
|
# Check to see if there were any repeating instruments
|
|
if (!any(vars_in_data == "redcap_repeat_instrument")) {
|
|
|
|
message("There are no repeating instruments in this data.")
|
|
|
|
return(list(records))
|
|
|
|
}
|
|
|
|
# Standardize variable names for metadata
|
|
names(metadata) <- c(
|
|
"field_name", "form_name", "section_header", "field_type",
|
|
"field_label", "select_choices_or_calculations", "field_note",
|
|
"text_validation_type_or_show_slider_number", "text_validation_min",
|
|
"text_validation_max", "identifier", "branching_logic", "required_field",
|
|
"custom_alignment", "question_number", "matrix_group_name", "matrix_ranking",
|
|
"field_annotation"
|
|
)
|
|
|
|
# Make sure that no metadata columns are factors
|
|
metadata <- rapply(metadata, as.character, classes = "factor", how = "replace")
|
|
|
|
# Find the fields and associated form
|
|
fields <- metadata[
|
|
!metadata$field_type %in% c("descriptive", "checkbox"),
|
|
c("field_name", "form_name")
|
|
]
|
|
|
|
# Process instrument status fields
|
|
form_names <- unique(metadata$form_name)
|
|
form_complete_fields <- data.frame(
|
|
field_name = paste0(form_names, "_complete"),
|
|
form_name = form_names,
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
fields <- rbind(fields, form_complete_fields)
|
|
|
|
# Process checkbox fields
|
|
if (any(metadata$field_type == "checkbox")) {
|
|
|
|
checkbox_basenames <- metadata[
|
|
metadata$field_type == "checkbox",
|
|
c("field_name", "form_name")
|
|
]
|
|
|
|
checkbox_fields <-
|
|
do.call(
|
|
"rbind",
|
|
apply(
|
|
checkbox_basenames,
|
|
1,
|
|
function(x, y)
|
|
data.frame(
|
|
field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)],
|
|
form_name = x[2],
|
|
stringsAsFactors = FALSE,
|
|
row.names = NULL
|
|
),
|
|
y = vars_in_data
|
|
)
|
|
)
|
|
|
|
fields <- rbind(fields, checkbox_fields)
|
|
|
|
}
|
|
|
|
# Process ".*\\.factor" fields supplied by REDCap's export data R script
|
|
if (any(grepl("\\.factor$", vars_in_data))) {
|
|
|
|
factor_fields <-
|
|
do.call(
|
|
"rbind",
|
|
apply(
|
|
fields,
|
|
1,
|
|
function(x, y) {
|
|
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
|
|
if (any(field_indices))
|
|
data.frame(
|
|
field_name = y[field_indices],
|
|
form_name = x[2],
|
|
stringsAsFactors = FALSE,
|
|
row.names = NULL
|
|
)
|
|
},
|
|
y = vars_in_data
|
|
)
|
|
)
|
|
|
|
fields <- rbind(fields, factor_fields)
|
|
|
|
}
|
|
|
|
# Identify the subtables in the data
|
|
subtables <- unique(records$redcap_repeat_instrument)
|
|
subtables <- subtables[subtables != ""]
|
|
|
|
# Variables to be present in each output table
|
|
universal_fields <- c(
|
|
vars_in_data[1],
|
|
grep(
|
|
"^redcap_(?!(repeat)).*",
|
|
vars_in_data,
|
|
value = TRUE,
|
|
perl = TRUE
|
|
)
|
|
)
|
|
|
|
# Variables to be at the beginning of each repeating instrument
|
|
repeat_instrument_fields <- grep(
|
|
"^redcap_repeat.*",
|
|
vars_in_data,
|
|
value = TRUE
|
|
)
|
|
|
|
|
|
# Split the table based on instrument
|
|
out <- split.data.frame(records, records$redcap_repeat_instrument)
|
|
|
|
# Delete the variables that are not relevant
|
|
for (i in names(out)) {
|
|
|
|
if (i == "") {
|
|
|
|
out_fields <- which(
|
|
vars_in_data %in% c(
|
|
universal_fields,
|
|
fields[!fields[,2] %in% subtables, 1]
|
|
)
|
|
)
|
|
out[[which(names(out) == "")]] <- out[[which(names(out) == "")]][out_fields]
|
|
|
|
} else {
|
|
|
|
out_fields <- which(
|
|
vars_in_data %in% c(
|
|
universal_fields,
|
|
repeat_instrument_fields,
|
|
fields[fields[,2] == i, 1]
|
|
)
|
|
)
|
|
out[[i]] <- out[[i]][out_fields]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out
|
|
|
|
}
|