#' Split REDCap repeating instruments table into multiple tables #' #' This will take output from a REDCap export and split it into a base table #' and child tables for each repeating instrument. Metadata #' is used to determine which fields should be included in each resultant table. #' #' @param records Exported project records. May be a \code{data.frame}, #' \code{response}, or \code{character} vector containing JSON from an API #' call. #' @param metadata Project metadata (the data dictionary). May be a #' \code{data.frame}, \code{response}, or \code{character} vector containing #' JSON from an API call. #' @author Paul W. Egeler, M.S., GStat #' @examples #' \dontrun{ #' # Using an API call ------------------------------------------------------- #' #' library(RCurl) #' #' # Get the records #' records <- postForm( #' uri = api_url, # Supply your site-specific URI #' token = api_token, # Supply your own API token #' content = 'record', #' format = 'json', #' returnFormat = 'json' #' ) #' #' # Get the metadata #' metadata <- postForm( #' uri = api_url, # Supply your site-specific URI #' token = api_token, # Supply your own API token #' content = 'metadata', #' format = 'json' #' ) #' #' # Convert exported JSON strings into a list of data.frames #' REDCapRITS::REDCap_split(records, metadata) #' #' # Using a raw data export ------------------------------------------------- #' #' # Get the records #' records <- read.csv("/path/to/data/ExampleProject_DATA_2018-06-03_1700.csv") #' #' # Get the metadata #' metadata <- read.csv("/path/to/data/ExampleProject_DataDictionary_2018-06-03.csv") #' #' # Split the tables #' REDCapRITS::REDCap_split(records, metadata) #' #' # In conjunction with the R export script --------------------------------- #' #' # You must set the working directory first since the REDCap data export script #' # contains relative file references. #' setwd("/path/to/data/") #' #' # Run the data export script supplied by REDCap. #' # This will create a data.frame of your records called 'data' #' source("ExampleProject_R_2018-06-03_1700.r") #' #' # Get the metadata #' metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv") #' #' # Split the tables #' REDCapRITS::REDCap_split(data, metadata) #' } #' @return A list of \code{"data.frame"}s: one base table and zero or more #' tables for each repeating instrument. #' @include process_user_input.r #' @export REDCap_split <- function(records, metadata) { # Process user input records <- process_user_input(records) metadata <- process_user_input(metadata) # Get the variable names in the dataset vars_in_data <- names(records) # Check to see if there were any repeating instruments if (!any(vars_in_data == "redcap_repeat_instrument")) { message("There are no repeating instruments in this data.") return(list(records)) } # Standardize variable names for metadata names(metadata) <- c( "field_name", "form_name", "section_header", "field_type", "field_label", "select_choices_or_calculations", "field_note", "text_validation_type_or_show_slider_number", "text_validation_min", "text_validation_max", "identifier", "branching_logic", "required_field", "custom_alignment", "question_number", "matrix_group_name", "matrix_ranking", "field_annotation" ) # Make sure that no metadata columns are factors metadata <- rapply(metadata, as.character, classes = "factor", how = "replace") # Find the fields and associated form fields <- metadata[ !metadata$field_type %in% c("descriptive", "checkbox"), c("field_name", "form_name") ] # Process instrument status fields form_names <- unique(metadata$form_name) form_complete_fields <- data.frame( field_name = paste0(form_names, "_complete"), form_name = form_names, stringsAsFactors = FALSE ) fields <- rbind(fields, form_complete_fields) # Process checkbox fields if (any(metadata$field_type == "checkbox")) { checkbox_basenames <- metadata[ metadata$field_type == "checkbox", c("field_name", "form_name") ] checkbox_fields <- do.call( "rbind", apply( checkbox_basenames, 1, function(x, y) data.frame( field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)], form_name = x[2], stringsAsFactors = FALSE, row.names = NULL ), y = vars_in_data ) ) fields <- rbind(fields, checkbox_fields) } # Process ".*\\.factor" fields supplied by REDCap's export data R script if (any(grepl("\\.factor$", vars_in_data))) { factor_fields <- do.call( "rbind", apply( fields, 1, function(x, y) { field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y) if (any(field_indices)) data.frame( field_name = y[field_indices], form_name = x[2], stringsAsFactors = FALSE, row.names = NULL ) }, y = vars_in_data ) ) fields <- rbind(fields, factor_fields) } # Identify the subtables in the data subtables <- unique(records$redcap_repeat_instrument) subtables <- subtables[subtables != ""] # Variables to be present in each output table universal_fields <- c( vars_in_data[1], grep( "^redcap_(?!(repeat)).*", vars_in_data, value = TRUE, perl = TRUE ) ) # Variables to be at the beginning of each repeating instrument repeat_instrument_fields <- grep( "^redcap_repeat.*", vars_in_data, value = TRUE ) # Split the table based on instrument out <- split.data.frame(records, records$redcap_repeat_instrument) # Delete the variables that are not relevant for (i in names(out)) { if (i == "") { out_fields <- which( vars_in_data %in% c( universal_fields, fields[!fields[,2] %in% subtables, 1] ) ) out[[which(names(out) == "")]] <- out[[which(names(out) == "")]][out_fields] } else { out_fields <- which( vars_in_data %in% c( universal_fields, repeat_instrument_fields, fields[fields[,2] == i, 1] ) ) out[[i]] <- out[[i]][out_fields] } } out }