REDCapCAST/R/REDCap_split.r

#' Split REDCap repeating instruments table into multiple tables
#'
#' This will take output from a REDCap export and split it into a base table
#' and child tables for each repeating instrument. Metadata
#' is used to determine which fields should be included in each resultant table.
#'
#' @param records Exported project records. May be a \code{data.frame},
#'   \code{response}, or \code{character} vector containing JSON from an API
#'   call.
#' @param metadata Project metadata (the data dictionary). May be a
#'   \code{data.frame}, \code{response}, or \code{character} vector containing
#'   JSON from an API call.
#' @param primary_table_name Name given to the list element for the primary
#'   output table (as described in \emph{README.md}). Ignored if
#'   \code{forms = 'all'}.
#' @param forms Indicate whether to create separate tables for repeating
#'   instruments only or for all forms.
#' @author Paul W. Egeler, M.S., GStat
#' @examples
#' \dontrun{
#' # Using an API call -------------------------------------------------------
#'
#' library(RCurl)
#'
#' # Get the records
#' records <- postForm(
#'   uri = api_url,     # Supply your site-specific URI
#'   token = api_token, # Supply your own API token
#'   content = 'record',
#'   format = 'json',
#'   returnFormat = 'json'
#' )
#'
#' # Get the metadata
#' metadata <- postForm(
#'   uri = api_url,     # Supply your site-specific URI
#'   token = api_token, # Supply your own API token
#'   content = 'metadata',
#'   format = 'json'
#' )
#'
#' # Convert exported JSON strings into a list of data.frames
#' REDCapRITS::REDCap_split(records, metadata)
#'
#' # Using a raw data export -------------------------------------------------
#'
#' # Get the records
#' records <- read.csv("/path/to/data/ExampleProject_DATA_2018-06-03_1700.csv")
#'
#' # Get the metadata
#' metadata <- read.csv(
#' "/path/to/data/ExampleProject_DataDictionary_2018-06-03.csv")
#'
#' # Split the tables
#' REDCapRITS::REDCap_split(records, metadata)
#'
#' # In conjunction with the R export script ---------------------------------
#'
#' # You must set the working directory first since the REDCap data export
#' # script contains relative file references.
#' old <- getwd()
#' setwd("/path/to/data/")
#'
#' # Run the data export script supplied by REDCap.
#' # This will create a data.frame of your records called 'data'
#' source("ExampleProject_R_2018-06-03_1700.r")
#'
#' # Get the metadatan
#' metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")
#'
#' # Split the tables
#' REDCapRITS::REDCap_split(data, metadata)
#' setwd(old)
#' }
#' @return A list of \code{"data.frame"}s. The number of tables will differ
#'   depending on the \code{forms} option selected.
#'   \itemize{
#'     \item \code{'repeating'}: one base table and one or more
#'     tables for each repeating instrument.
#'     \item \code{'all'}: a data.frame for each instrument, regardless of
#'     whether it is a repeating instrument or not.
#'   }
#' @include process_user_input.r utils.r
#' @export
REDCap_split <- function(records,
                         metadata,
                         primary_table_name = "",
                         forms = c("repeating", "all")) {

  # Process user input
  records  <- process_user_input(records)
  metadata <-
    as.data.frame(process_user_input(metadata))

  # Get the variable names in the dataset
  vars_in_data <- names(records)

  # Process repeat instrument names to match the redcap naming
  if (is.repeated_longitudinal(records)){
  records$redcap_repeat_instrument <- clean_redcap_name(records$redcap_repeat_instrument)

  # Match arg for forms
  forms <- match.arg(forms, c("repeating", "all"))

  # Check to see if there were any repeating instruments
  if (forms == "repeating" &&
      !"redcap_repeat_instrument" %in% vars_in_data) {
    stop("There are no repeating instruments in this dataset.")
  }

  # Remove NAs from `redcap_repeat_instrument` (see issue #12)
  if (any(is.na(records$redcap_repeat_instrument))) {
    records$redcap_repeat_instrument <- ifelse(
      is.na(records$redcap_repeat_instrument),
      "",
      as.character(records$redcap_repeat_instrument)
    )
  }
  }

  # Standardize variable names for metadata
  # names(metadata) <- metadata_names

  # Make sure that no metadata columns are factors
  metadata <-
    rapply(metadata, as.character, classes = "factor", how = "replace")

  # Find the fields and associated form
  fields <- match_fields_to_form(metadata, vars_in_data)

  # Variables to be present in each output table
  universal_fields <- c(
    vars_in_data[1],
    grep(
      "^redcap_(?!(repeat)).*",
      vars_in_data,
      value = TRUE,
      perl = TRUE
    )
  )

  if ("redcap_repeat_instrument" %in% vars_in_data) {
    # Variables to be at the beginning of each repeating instrument
    repeat_instrument_fields <- grep("^redcap_repeat.*",
                                     vars_in_data,
                                     value = TRUE)

    # Identify the subtables in the data
    subtables <- unique(records$redcap_repeat_instrument)
    subtables <- subtables[subtables != ""]

    # Split the table based on instrument
    out <-
      split.data.frame(records, records$redcap_repeat_instrument)
    primary_table_index <- which(names(out) == "")

    if (forms == "repeating" && primary_table_name %in% subtables) {
      warning(
        "The label given to the primary table is already used by a repeating
        instrument. The primary table label will be left blank."
      )
      primary_table_name <- ""
    } else if (primary_table_name > "") {
      names(out)[[primary_table_index]] <- primary_table_name
    }

    # Delete the variables that are not relevant
    for (i in names(out)) {
      if (i == primary_table_name) {
        out_fields <- which(vars_in_data %in% c(universal_fields,
                                                fields[!fields[, 2] %in%
                                                         subtables, 1]))
        out[[primary_table_index]] <-
          out[[primary_table_index]][out_fields]

      } else {
        out_fields <- which(vars_in_data %in% c(universal_fields,
                                                repeat_instrument_fields,
                                                fields[fields[, 2] == i, 1]))
        out[[i]] <- out[[i]][out_fields]

      }

    }

    if (forms == "all") {
      out <- c(split_non_repeating_forms(out[[primary_table_index]],
                                         universal_fields,
                                         fields[!fields[, 2] %in% subtables, ]),
               out[-primary_table_index])

    }

  } else {
    out <- split_non_repeating_forms(records, universal_fields, fields)

  }

  out

}
Initial Commit 2018-01-19 17:50:08 +01:00			`#' Split REDCap repeating instruments table into multiple tables`
			`#'`
Converted R code into a package 2018-06-03 22:08:26 +02:00			`#' This will take output from a REDCap export and split it into a base table`
			`#' and child tables for each repeating instrument. Metadata`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' is used to determine which fields should be included in each resultant table.`
			`#'`
Turned routines to preprocess user inputs into S3 methods. Still needs to be tested. 2018-06-23 04:24:34 +02:00			`#' @param records Exported project records. May be a \code{data.frame},`
			`#' \code{response}, or \code{character} vector containing JSON from an API`
Converted R code into a package 2018-06-03 22:08:26 +02:00			`#' call.`
Turned routines to preprocess user inputs into S3 methods. Still needs to be tested. 2018-06-23 04:24:34 +02:00			`#' @param metadata Project metadata (the data dictionary). May be a`
			`#' \code{data.frame}, \code{response}, or \code{character} vector containing`
			`#' JSON from an API call.`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`#' @param primary_table_name Name given to the list element for the primary`
			`#' output table (as described in \emph{README.md}). Ignored if`
			`#' \code{forms = 'all'}.`
			`#' @param forms Indicate whether to create separate tables for repeating`
			`#' instruments only or for all forms.`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' @author Paul W. Egeler, M.S., GStat`
			`#' @examples`
			`#' \dontrun{`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#' # Using an API call -------------------------------------------------------`
			`#'`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' library(RCurl)`
Cleaning up R code and adding RStudio project. 2018-05-25 18:02:21 +02:00			`#'`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' # Get the records`
Fixing variable names in R package example. 2018-06-03 22:46:25 +02:00			`#' records <- postForm(`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#' uri = api_url, # Supply your site-specific URI`
			`#' token = api_token, # Supply your own API token`
			`#' content = 'record',`
			`#' format = 'json',`
			`#' returnFormat = 'json'`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' )`
Cleaning up R code and adding RStudio project. 2018-05-25 18:02:21 +02:00			`#'`
Fixing variable names in R package example. 2018-06-03 22:46:25 +02:00			`#' # Get the metadata`
			`#' metadata <- postForm(`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#' uri = api_url, # Supply your site-specific URI`
			`#' token = api_token, # Supply your own API token`
			`#' content = 'metadata',`
			`#' format = 'json'`
Fixing variable names in R package example. 2018-06-03 22:46:25 +02:00			`#' )`
			`#'`
Converted R code into a package 2018-06-03 22:08:26 +02:00			`#' # Convert exported JSON strings into a list of data.frames`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#' REDCapRITS::REDCap_split(records, metadata)`
			`#'`
			`#' # Using a raw data export -------------------------------------------------`
			`#'`
			`#' # Get the records`
			`#' records <- read.csv("/path/to/data/ExampleProject_DATA_2018-06-03_1700.csv")`
			`#'`
			`#' # Get the metadata`
gp with CRAN in sight 2023-04-13 10:57:04 +02:00			`#' metadata <- read.csv(`
			`#' "/path/to/data/ExampleProject_DataDictionary_2018-06-03.csv")`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#'`
			`#' # Split the tables`
			`#' REDCapRITS::REDCap_split(records, metadata)`
			`#'`
			`#' # In conjunction with the R export script ---------------------------------`
			`#'`
gp with CRAN in sight 2023-04-13 10:57:04 +02:00			`#' # You must set the working directory first since the REDCap data export`
			`#' # script contains relative file references.`
getwd/setwd fix, examples revision upcoming 2023-06-05 08:35:34 +02:00			`#' old <- getwd()`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#' setwd("/path/to/data/")`
			`#'`
			`#' # Run the data export script supplied by REDCap.`
			`#' # This will create a data.frame of your records called 'data'`
			`#' source("ExampleProject_R_2018-06-03_1700.r")`
			`#'`
getwd/setwd fix, examples revision upcoming 2023-06-05 08:35:34 +02:00			`#' # Get the metadatan`
r-pkg: adding more examples. Will make some of them live with exdata in the future. 2018-06-09 05:24:35 +02:00			`#' metadata <- read.csv("ExampleProject_DataDictionary_2018-06-03.csv")`
			`#'`
			`#' # Split the tables`
			`#' REDCapRITS::REDCap_split(data, metadata)`
getwd/setwd fix, examples revision upcoming 2023-06-05 08:35:34 +02:00			`#' setwd(old)`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' }`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`#' @return A list of \code{"data.frame"}s. The number of tables will differ`
			`#' depending on the \code{forms} option selected.`
			`#' \itemize{`
			`#' \item \code{'repeating'}: one base table and one or more`
			`#' tables for each repeating instrument.`
			`#' \item \code{'all'}: a data.frame for each instrument, regardless of`
			`#' whether it is a repeating instrument or not.`
			`#' }`
Closes #9. User can specify primary table label Incremented to v0.1.0 2019-07-01 22:54:29 +02:00			`#' @include process_user_input.r utils.r`
Initial Commit 2018-01-19 17:50:08 +01:00			`#' @export`
Closes #9. User can specify primary table label Incremented to v0.1.0 2019-07-01 22:54:29 +02:00			`REDCap_split <- function(records,`
			`metadata,`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`primary_table_name = "",`
working to submit to cran 2023-01-16 09:49:17 +01:00			`forms = c("repeating", "all")) {`
commented out archaeological findings 2023-03-06 14:36:32 +01:00
adding some metadata processing in R package to handle variety of user inputs 2018-06-04 16:40:16 +02:00			`# Process user input`
Turned routines to preprocess user inputs into S3 methods. Still needs to be tested. 2018-06-23 04:24:34 +02:00			`records <- process_user_input(records)`
working to submit to cran 2023-01-16 09:49:17 +01:00			`metadata <-`
a new helper function has been introduced to help matching form names when special characters are in use. 2023-04-14 11:46:09 +02:00			`as.data.frame(process_user_input(metadata))`

r-pkg: adding redcap_repeat_instrument.factor logic and improving checkbox logic 2018-06-06 23:55:01 +02:00			`# Get the variable names in the dataset`
			`vars_in_data <- names(records)`

data handling specific to repeated and/or longitudinal projects as been wrapped in if statement to allow handling of simple projects and just split instruments 2024-02-06 08:42:09 +01:00			`# Process repeat instrument names to match the redcap naming`
			`if (is.repeated_longitudinal(records)){`
			`records$redcap_repeat_instrument <- clean_redcap_name(records$redcap_repeat_instrument)`

Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`# Match arg for forms`
a new helper function has been introduced to help matching form names when special characters are in use. 2023-04-14 11:46:09 +02:00			`forms <- match.arg(forms, c("repeating", "all"))`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00
Cleaning up R code and adding RStudio project. 2018-05-25 18:02:21 +02:00			`# Check to see if there were any repeating instruments`
working to submit to cran 2023-01-16 09:49:17 +01:00			`if (forms == "repeating" &&`
			`!"redcap_repeat_instrument" %in% vars_in_data) {`
Closes #9. User can specify primary table label Incremented to v0.1.0 2019-07-01 22:54:29 +02:00			`stop("There are no repeating instruments in this dataset.")`
Cleaning up R code and adding RStudio project. 2018-05-25 18:02:21 +02:00			`}`
Initial Commit 2018-01-19 17:50:08 +01:00
Closing issue #12 test included 2019-07-26 23:26:33 +02:00			# Remove NAs from `redcap_repeat_instrument` (see issue #12)
working to submit to cran 2023-01-16 09:49:17 +01:00			`if (any(is.na(records$redcap_repeat_instrument))) {`
Closing issue #12 test included 2019-07-26 23:26:33 +02:00			`records$redcap_repeat_instrument <- ifelse(`
			`is.na(records$redcap_repeat_instrument),`
			`"",`
			`as.character(records$redcap_repeat_instrument)`
			`)`
			`}`
data handling specific to repeated and/or longitudinal projects as been wrapped in if statement to allow handling of simple projects and just split instruments 2024-02-06 08:42:09 +01:00			`}`
Closing issue #12 test included 2019-07-26 23:26:33 +02:00
adding some metadata processing in R package to handle variety of user inputs 2018-06-04 16:40:16 +02:00			`# Standardize variable names for metadata`
commented out archaeological findings 2023-03-06 14:36:32 +01:00			`# names(metadata) <- metadata_names`
adding some metadata processing in R package to handle variety of user inputs 2018-06-04 16:40:16 +02:00
			`# Make sure that no metadata columns are factors`
working to submit to cran 2023-01-16 09:49:17 +01:00			`metadata <-`
			`rapply(metadata, as.character, classes = "factor", how = "replace")`
adding some metadata processing in R package to handle variety of user inputs 2018-06-04 16:40:16 +02:00
Closes #1. Allow R code to handle checkbox fields. 2018-06-01 23:41:27 +02:00			`# Find the fields and associated form`
Closes #9. User can specify primary table label Incremented to v0.1.0 2019-07-01 22:54:29 +02:00			`fields <- match_fields_to_form(metadata, vars_in_data)`
Initial Commit 2018-01-19 17:50:08 +01:00
Working on #7. Appears to be a functioning patch NEEDS UNIT TESTS!!! 2018-06-23 08:04:26 +02:00			`# Variables to be present in each output table`
			`universal_fields <- c(`
			`vars_in_data[1],`
			`grep(`
			`"^redcap_(?!(repeat)).*",`
			`vars_in_data,`
			`value = TRUE,`
			`perl = TRUE`
r-pkg: adding redcap_repeat_instrument.factor logic and improving checkbox logic 2018-06-06 23:55:01 +02:00			`)`
Working on #7. Appears to be a functioning patch NEEDS UNIT TESTS!!! 2018-06-23 08:04:26 +02:00			`)`

Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`if ("redcap_repeat_instrument" %in% vars_in_data) {`
			`# Variables to be at the beginning of each repeating instrument`
working to submit to cran 2023-01-16 09:49:17 +01:00			`repeat_instrument_fields <- grep("^redcap_repeat.*",`
			`vars_in_data,`
			`value = TRUE)`
r-pkg: adding redcap_repeat_instrument.factor logic and improving checkbox logic 2018-06-06 23:55:01 +02:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`# Identify the subtables in the data`
			`subtables <- unique(records$redcap_repeat_instrument)`
			`subtables <- subtables[subtables != ""]`
Closes #9. User can specify primary table label Incremented to v0.1.0 2019-07-01 22:54:29 +02:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`# Split the table based on instrument`
working to submit to cran 2023-01-16 09:49:17 +01:00			`out <-`
			`split.data.frame(records, records$redcap_repeat_instrument)`
Increment to v 0.2.0 and closes #10 Form timestamp fields also captured now. 2019-07-09 00:01:48 +02:00			`primary_table_index <- which(names(out) == "")`
Initial Commit 2018-01-19 17:50:08 +01:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`if (forms == "repeating" && primary_table_name %in% subtables) {`
working to submit to cran 2023-01-16 09:49:17 +01:00			`warning(`
gp with CRAN in sight 2023-04-13 10:57:04 +02:00			`"The label given to the primary table is already used by a repeating`
			`instrument. The primary table label will be left blank."`
working to submit to cran 2023-01-16 09:49:17 +01:00			`)`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`primary_table_name <- ""`
			`} else if (primary_table_name > "") {`
Increment to v 0.2.0 and closes #10 Form timestamp fields also captured now. 2019-07-09 00:01:48 +02:00			`names(out)[[primary_table_index]] <- primary_table_name`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`}`
Closes #9. User can specify primary table label Incremented to v0.1.0 2019-07-01 22:54:29 +02:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`# Delete the variables that are not relevant`
			`for (i in names(out)) {`
			`if (i == primary_table_name) {`
working to submit to cran 2023-01-16 09:49:17 +01:00			`out_fields <- which(vars_in_data %in% c(universal_fields,`
gp with CRAN in sight 2023-04-13 10:57:04 +02:00			`fields[!fields[, 2] %in%`
			`subtables, 1]))`
working to submit to cran 2023-01-16 09:49:17 +01:00			`out[[primary_table_index]] <-`
			`out[[primary_table_index]][out_fields]`
Initial Commit 2018-01-19 17:50:08 +01:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`} else {`
working to submit to cran 2023-01-16 09:49:17 +01:00			`out_fields <- which(vars_in_data %in% c(universal_fields,`
			`repeat_instrument_fields,`
			`fields[fields[, 2] == i, 1]))`
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`out[[i]] <- out[[i]][out_fields]`
Initial Commit 2018-01-19 17:50:08 +01:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`}`
Initial Commit 2018-01-19 17:50:08 +01:00
Fixed bug introduced in previous commit, added test to check for bug Also started working on issue #10 2019-07-08 18:19:33 +02:00			`}`
Increment to v 0.2.0 and closes #10 Form timestamp fields also captured now. 2019-07-09 00:01:48 +02:00
			`if (forms == "all") {`
working to submit to cran 2023-01-16 09:49:17 +01:00			`out <- c(split_non_repeating_forms(out[[primary_table_index]],`
			`universal_fields,`
			`fields[!fields[, 2] %in% subtables, ]),`
			`out[-primary_table_index])`
Increment to v 0.2.0 and closes #10 Form timestamp fields also captured now. 2019-07-09 00:01:48 +02:00
			`}`

			`} else {`
			`out <- split_non_repeating_forms(records, universal_fields, fields)`

Cleaning up R code and adding RStudio project. 2018-05-25 18:02:21 +02:00			`}`
Initial Commit 2018-01-19 17:50:08 +01:00
Converted R code into a package 2018-06-03 22:08:26 +02:00			`out`
Initial Commit 2018-01-19 17:50:08 +01:00
			`}`
data handling specific to repeated and/or longitudinal projects as been wrapped in if statement to allow handling of simple projects and just split instruments 2024-02-06 08:42:09 +01:00