Major update. New functions and improvements. See NEWS.md.

2025-04-22 06:12:34 +02:00 · 2023-03-07 15:38:28 +01:00 · 2023-03-07 15:38:28 +01:00 · 9f68e27f5a
commit 9f68e27f5a
parent b57e130395
20 changed files with 441 additions and 95 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -2,3 +2,4 @@
 ^\.Rproj\.user$
 ^data-raw$
 ^test-data$
+^troubleshooting\.R$
--- a/4
+++ b/4
@ -33,8 +33,10 @@ RoxygenNote: 7.2.3
 URL: https://github.com/agdamsbo/REDCapRITS
 BugReports: https://github.com/agdamsbo/REDCapRITS/issues
 Imports: 
+    dplyr,
    REDCapR,
-    tidyr
+    tidyr,
+    tidyselect
 Collate: 
    'utils.r'
    'process_user_input.r'
--- a/6
+++ b/6
@ -1,8 +1,14 @@
 # Generated by roxygen2: do not edit by hand

 export(REDCap_split)
+export(focused_metadata)
+export(match_fields_to_form)
 export(read_redcap_tables)
 export(redcap_wider)
+export(sanitize_split)
+export(split_non_repeating_forms)
+importFrom(REDCapR,redcap_event_instruments)
 importFrom(REDCapR,redcap_metadata_read)
 importFrom(REDCapR,redcap_read)
 importFrom(tidyr,pivot_wider)
+importFrom(tidyselect,all_of)
--- a/NEWS.md
+++ b/NEWS.md
@ -6,6 +6,8 @@ To reflect new functions and the limitation to only working in R, I have changed

 The versioning has moved to a monthly naming convention.

+The main goal this package is to keep the option to only export a defined subset of the whole dataset from the REDCap server as is made possible through the `REDCapR::redcap_read()` function, and combine it with the work put into the REDCapRITS package and the handling of longitudinal projects and/or projects with repeated instruments.
+
 ### Functions:

 * `read_redcap_tables()` **NEW**: this function is mainly an implementation of the combined use of `REDCapR::readcap_read()` and `REDCap_split()` to maintain the focused nature of `REDCapR::readcap_read()`, to only download the specified data. Also implements tests of valid form names and event names. The usual fall-back solution was to get all data.
@ -13,3 +15,7 @@ The versioning has moved to a monthly naming convention.
 * `redcap_wider()` **NEW**: this function pivots the long data frames from `read_redcap_tables()` using `tidyr::pivot_wider()`.

 * `focused_metadata()` **NEW**: a hidden helper function to enable a focused data acquisition approach to handle only a subset of metadata corresponding to the focused dataset.
+
+### Notes:
+
+* metadata handling **IMPROVED**: improved handling of different column names in matadata (DataDictionary) from REDCap dependent on whether it is acquired thorugh the api og downloaded from the server. 
--- a/R/read_redcap_tables.R
+++ b/R/read_redcap_tables.R
@ -1,6 +1,8 @@
 #' Download REDCap data
 #'
-#' Wrapper function for using REDCapR::redcap_read and REDCapRITS::REDCap_split
+#' Implementation of REDCap_split with a focused data acquisition approach using
+#' REDCapR::redcap_read nad only downloading specified fields, forms and/or events
+#' using the built-in focused_metadata
 #' including some clean-up. Works with longitudinal projects with repeating
 #' instruments.
 #' @param uri REDCap database uri
@ -10,6 +12,7 @@
 #' @param events events to download
 #' @param forms forms to download
 #' @param raw_or_label raw or label tags
+#' @param split_forms Whether to split "repeating" or "all" forms, default is all.
 #' @param generics vector of auto-generated generic variable names to
 #' ignore when discarding empty rows
 #'
@ -27,6 +30,7 @@ read_redcap_tables <- function(uri,
                               events = NULL,
                               forms = NULL,
                               raw_or_label = "label",
+                               split_forms = "all",
                               generics = c(
                                 "record_id",
                                 "redcap_event_name",
@ -57,6 +61,7 @@ read_redcap_tables <- function(uri,
    }
  }

+  # Getting dataset
  d <- REDCapR::redcap_read(
    redcap_uri = uri,
    token = token,
@ -65,23 +70,33 @@ read_redcap_tables <- function(uri,
    forms = forms,
    records = records,
    raw_or_label = raw_or_label
-  )
+  )[["data"]]

+  # Process repeat instrument naming
+  # Removes any extra characters other than a-z, 0-9 and "_", to mimic raw instrument names.
+  if ("redcap_repeat_instrument" %in% names(d)) {
+    d$redcap_repeat_instrument <-
+    gsub("[^a-z0-9_]", "", gsub(" ", "_", tolower(d$redcap_repeat_instrument)))
+  }
+
+  # Getting metadata
  m <-
-    REDCapR::redcap_metadata_read (redcap_uri = uri, token = token)
+    REDCapR::redcap_metadata_read (redcap_uri = uri, token = token)[["data"]]

-  l <- REDCap_split(d$data,
-                    focused_metadata(m$data,names(d$data)),
-                    forms = "all")
-
-  lapply(l, function(i) {
-    if (ncol(i) > 2) {
-      s <- data.frame(i[, !colnames(i) %in% generics])
-      i[!apply(is.na(s), MARGIN = 1, FUN = all), ]
-    } else {
-      i
+  # Processing metadata to reflect dataset
+  if (!is.null(c(fields,forms,events))){
+    m <- focused_metadata(m,names(d))
  }
-  })
+
+  # Splitting
+  l <- REDCap_split(d,
+                    m,
+                    forms = split_forms,
+                    primary_table_name = "nonrepeating")
+
+  # Sanitizing split list by removing completely empty rows apart from colnames
+  # in "generics"
+  sanitize_split(l,generics)

 }

--- a/R/redcap_wider.R
+++ b/R/redcap_wider.R
@ -1,13 +1,17 @@
-
+utils::globalVariables(c("redcap_wider",
+"event.glue",
+"inst.glue"))

 #' @title Redcap Wider
 #' @description Converts a list of REDCap data frames from long to wide format.
 #' Handles longitudinal projects, but not yet repeated instruments.
 #' @param list A list of data frames.
-#' @param names.glud A string to glue the column names together.
+#' @param event.glue A dplyr::glue string for repeated events naming
+#' @param inst.glue A dplyr::glue string for repeated instruments naming
 #' @return The list of data frames in wide format.
 #' @export
 #' @importFrom tidyr pivot_wider
+#' @importFrom tidyselect all_of
 #'
 #' @examples
 #' list <- list(data.frame(record_id = c(1,2,1,2),
@ -17,23 +21,74 @@
 #' redcap_event_name = c("baseline", "baseline"),
 #' gender = c("male", "female")))
 #' redcap_wider(list)
-redcap_wider <- function(list,names.glud="{.value}_{redcap_event_name}_long") {
+redcap_wider <-
+  function(list,
+           event.glue = "{.value}_{redcap_event_name}",
+           inst.glue = "{.value}_{redcap_repeat_instance}") {
+    all_names <- unique(do.call(c, lapply(list, names)))
+
+    if (!any(c("redcap_event_name", "redcap_repeat_instrument") %in% all_names)) {
+      stop(
+        "The dataset does not include a 'redcap_event_name' variable.
+         redcap_wider only handles projects with repeating instruments or
+         longitudinal projects"
+      )
+    }
+
+    # if (any(grepl("_timestamp",all_names))){
+    #   stop("The dataset includes a '_timestamp' variable, which is not supported
+    #        by this function yet. Sorry! Feel free to contribute :)")
+    # }
+
+    id.name <- all_names[1]
+
    l <- lapply(list, function(i) {
-    incl <- any(duplicated(i[["record_id"]]))
+      rep_inst <- "redcap_repeat_instrument" %in% names(i)

-    cname <- colnames(i)
-    vals <- cname[!cname%in%c("record_id","redcap_event_name")]
-
-    i$redcap_event_name <- tolower(gsub(" ","_",i$redcap_event_name))
-
-    if (incl){
-      s <- tidyr::pivot_wider(i,
-                              names_from = redcap_event_name,
+      if (rep_inst) {
+        k <- lapply(split(i, f = i[[id.name]]), function(j) {
+          cname <- colnames(j)
+          vals <-
+            cname[!cname %in% c(
+              id.name,
+              "redcap_event_name",
+              "redcap_repeat_instrument",
+              "redcap_repeat_instance"
+            )]
+          s <- tidyr::pivot_wider(
+            j,
+            names_from = "redcap_repeat_instance",
            values_from = all_of(vals),
-                              names_glue = names.glud)
-      s[colnames(s)!="redcap_event_name"]
-    } else (i[colnames(i)!="redcap_event_name"])
+            names_glue = inst.glue
+          )
+          s[!colnames(s) %in% c("redcap_repeat_instrument")]
+        })
+        i <- Reduce(dplyr::bind_rows, k)
+      }

+      event <- "redcap_event_name" %in% names(i)
+
+      if (event) {
+        event.n <- length(unique(i[["redcap_event_name"]])) > 1
+
+        i[["redcap_event_name"]] <-
+          gsub(" ", "_", tolower(i[["redcap_event_name"]]))
+
+        if (event.n) {
+          cname <- colnames(i)
+          vals <- cname[!cname %in% c(id.name, "redcap_event_name")]
+
+          s <- tidyr::pivot_wider(
+            i,
+            names_from = "redcap_event_name",
+            values_from = all_of(vals),
+            names_glue = event.glue
+          )
+          s[colnames(s) != "redcap_event_name"]
+        } else
+          (i[colnames(i) != "redcap_event_name"])
+      } else
+        (i)
    })

    ## Additional conditioning is needed to handle repeated instruments.
--- a/R/utils.r
+++ b/R/utils.r
@ -1,48 +1,60 @@
+
+
+#' focused_metadata
+#' @description Extracts limited metadata for variables in a dataset
+#' @param metadata A dataframe containing metadata
+#' @param vars_in_data Vector of variable names in the dataset
+#' @return A dataframe containing metadata for the variables in the dataset
+#' @export
+#' @examples
+#'
 focused_metadata <- function(metadata, vars_in_data) {
-  # metadata <- m$data
-  # vars_in_data <- names(d$data)
+
+  if (any(c("tbl_df", "tbl") %in% class(metadata))) {
+    metadata <- data.frame(metadata)
+  }
+
+  field_name <- grepl(".*[Ff]ield[._][Nn]ame$", names(metadata))
+  field_type <- grepl(".*[Ff]ield[._][Tt]ype$", names(metadata))

  fields <-
-    metadata[!metadata$field_type %in% c("descriptive", "checkbox") &
-               metadata$field_name %in% vars_in_data,
-             "field_name"]
+    metadata[!metadata[, field_type] %in% c("descriptive", "checkbox") &
+               metadata[, field_name] %in% vars_in_data,
+             field_name]

  # Process checkbox fields
-  if (any(metadata$field_type == "checkbox")) {
-
+  if (any(metadata[, field_type] == "checkbox")) {
    # Getting base field names from checkbox fields
-    vars_check <- gsub(pattern = "___(\\d+)",replacement = "", vars_in_data)
+    vars_check <-
+      sub(pattern = "___.*$", replacement = "", vars_in_data)

    # Processing
    checkbox_basenames <-
-      metadata[metadata$field_type == "checkbox" &
-                 metadata$field_name %in% vars_check,
-               "field_name"]
+      metadata[metadata[, field_type] == "checkbox" &
+                 metadata[, field_name] %in% vars_check,
+               field_name]

-    fields <- rbind(fields, checkbox_basenames)
+    fields <- c(fields, checkbox_basenames)

  }

  # Process instrument status fields
-  form_names <- unique(metadata$form_name[metadata$field_name %in% fields$field_name])
+  form_names <-
+    unique(metadata[, grepl(".*[Ff]orm[._][Nn]ame$",
+                            names(metadata))][metadata[, field_name]
+                                              %in% fields])

-  form_complete_fields <- data.frame(
-    field_name = paste0(form_names, "_complete"),
-    stringsAsFactors = FALSE
-  )
+  form_complete_fields <- paste0(form_names, "_complete")

-  fields <- rbind(fields, form_complete_fields)
+  fields <- c(fields, form_complete_fields)

  # Process survey timestamps
  timestamps <-
    intersect(vars_in_data, paste0(form_names, "_timestamp"))
  if (length(timestamps)) {
-    timestamp_fields <- data.frame(
-      field_name = timestamps,
-      stringsAsFactors = FALSE
-    )
+    timestamp_fields <- timestamps

-    fields <- rbind(fields, timestamp_fields)
+    fields <- c(fields, timestamp_fields)

  }

@ -64,20 +76,73 @@ focused_metadata <- function(metadata, vars_in_data) {
                    },
                    y = vars_in_data))

-    fields <- rbind(fields, factor_fields)
+    fields <- c(fields, factor_fields[, 1])

  }

-  metadata[metadata$field_name %in% fields$field_name,]
+  metadata[metadata[, field_name] %in% fields, ]

 }

+
+
+# function to convert the list of dataframes
+
+
+#' Sanitize list of data frames
+#'
+#' Removing empty rows
+#' @param l A list of data frames.
+#' @param generic.names A vector of generic names to be excluded.
+#'
+#' @return A list of data frames with generic names excluded.
+#'
+#' @export
+#'
+#' @examples
+#'
+sanitize_split <- function(l,
+                           generic.names = c(
+                             "record_id",
+                             "redcap_event_name",
+                             "redcap_repeat_instrument",
+                             "redcap_repeat_instance"
+                           )) {
+  lapply(l, function(i) {
+    if (ncol(i) > 2) {
+      s <- data.frame(i[, !colnames(i) %in% generic.names])
+      i[!apply(is.na(s), MARGIN = 1, FUN = all),]
+    } else {
+      i
+    }
+  })
+}
+
+
+#' Match fields to forms
+#'
+#' @param metadata A data frame containing field names and form names
+#' @param vars_in_data A character vector of variable names
+#'
+#' @return A data frame containing field names and form names
+#'
+#' @export
+#'
+#' @examples
+#'
+#'
 match_fields_to_form <- function(metadata, vars_in_data) {
-  fields <- metadata[!metadata$field_type %in% c("descriptive", "checkbox"),
-                     c("field_name", "form_name")]
+
+  field_form_name <- grepl(".*([Ff]ield|[Ff]orm)[._][Nn]ame$",names(metadata))
+  field_type <- grepl(".*[Ff]ield[._][Tt]ype$",names(metadata))
+
+  fields <- metadata[!metadata[,field_type] %in% c("descriptive", "checkbox"),
+                     field_form_name]
+
+  names(fields) <- c("field_name", "form_name")

  # Process instrument status fields
-  form_names <- unique(metadata$form_name)
+  form_names <- unique(metadata[,grepl(".*[Ff]orm[._][Nn]ame$",names(metadata))])
  form_complete_fields <- data.frame(
    field_name = paste0(form_names, "_complete"),
    form_name = form_names,
@ -101,9 +166,9 @@ match_fields_to_form <- function(metadata, vars_in_data) {
  }

  # Process checkbox fields
-  if (any(metadata$field_type == "checkbox")) {
-    checkbox_basenames <- metadata[metadata$field_type == "checkbox",
-                                   c("field_name", "form_name")]
+  if (any(metadata[,field_type] == "checkbox")) {
+    checkbox_basenames <- metadata[metadata[,field_type] == "checkbox",
+                                   field_form_name]

    checkbox_fields <-
      do.call("rbind",
@ -111,7 +176,9 @@ match_fields_to_form <- function(metadata, vars_in_data) {
                    1,
                    function(x, y)
                      data.frame(
-                        field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)],
+                        field_name =
+                          y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"),
+                                  y, perl = TRUE)],
                        form_name = x[2],
                        stringsAsFactors = FALSE,
                        row.names = NULL
@ -148,14 +215,50 @@ match_fields_to_form <- function(metadata, vars_in_data) {

 }

-
+#' Split a data frame into separate tables for each form
+#'
+#' @param table A data frame
+#' @param universal_fields A character vector of fields that should be included
+#' in every table
+#' @param fields A two-column matrix containing the names of fields that should
+#' be included in each form
+#'
+#' @return A list of data frames, one for each non-repeating form
+#'
+#' @export
+#'
+#' @examples
+#' # Create a table
+#' table <- data.frame(
+#'   id = c(1, 2, 3, 4, 5),
+#'   form_a_name = c("John", "Alice", "Bob", "Eve", "Mallory"),
+#'   form_a_age = c(25, 30, 25, 15, 20),
+#'   form_b_name = c("John", "Alice", "Bob", "Eve", "Mallory"),
+#'   form_b_gender = c("M", "F", "M", "F", "F")
+#' )
+#'
+#' # Create the universal fields
+#' universal_fields <- c("id")
+#'
+#' # Create the fields
+#' fields <- matrix(
+#'   c("form_a_name", "form_a",
+#'     "form_a_age", "form_a",
+#'     "form_b_name", "form_b",
+#'     "form_b_gender", "form_b"),
+#'   ncol = 2, byrow = TRUE
+#' )
+#'
+#' # Split the table
+#' split_non_repeating_forms(table, universal_fields, fields)
 split_non_repeating_forms <-
  function(table, universal_fields, fields) {
    forms <- unique(fields[[2]])

    x <- lapply(forms,
                function (x) {
-                  table[names(table) %in% union(universal_fields, fields[fields[, 2] == x, 1])]
+                  table[names(table) %in% union(universal_fields,
+                                                fields[fields[, 2] == x, 1])]
                })

    structure(x, names = forms)
--- a/man/focused_metadata.Rd
+++ b/man/focused_metadata.Rd
@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.r
+\name{focused_metadata}
+\alias{focused_metadata}
+\title{focused_metadata}
+\usage{
+focused_metadata(metadata, vars_in_data)
+}
+\arguments{
+\item{metadata}{A dataframe containing metadata}
+
+\item{vars_in_data}{Vector of variable names in the dataset}
+}
+\value{
+A dataframe containing metadata for the variables in the dataset
+}
+\description{
+Extracts limited metadata for variables in a dataset
+}
--- a/man/match_fields_to_form.Rd
+++ b/man/match_fields_to_form.Rd
@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.r
+\name{match_fields_to_form}
+\alias{match_fields_to_form}
+\title{Match fields to forms}
+\usage{
+match_fields_to_form(metadata, vars_in_data)
+}
+\arguments{
+\item{metadata}{A data frame containing field names and form names}
+
+\item{vars_in_data}{A character vector of variable names}
+}
+\value{
+A data frame containing field names and form names
+}
+\description{
+Match fields to forms
+}
--- a/man/read_redcap_tables.Rd
+++ b/man/read_redcap_tables.Rd
@ -12,9 +12,9 @@ read_redcap_tables(
  events = NULL,
  forms = NULL,
  raw_or_label = "label",
+  split_forms = "all",
  generics = c("record_id", "redcap_event_name", "redcap_repeat_instrument",
-    "redcap_repeat_instance"),
-  ...
+    "redcap_repeat_instance")
 )
 }
 \arguments{
@ -32,16 +32,18 @@ read_redcap_tables(

 \item{raw_or_label}{raw or label tags}

+\item{split_forms}{Whether to split "repeating" or "all" forms, default is all.}
+
 \item{generics}{vector of auto-generated generic variable names to
 ignore when discarding empty rows}
-
-\item{...}{ekstra parameters for REDCapR::redcap_read_oneshot}
 }
 \value{
 list of instruments
 }
 \description{
-Wrapper function for using REDCapR::redcap_read and REDCapRITS::REDCap_split
+Implementation of REDCap_split with a focused data acquisition approach using
+REDCapR::redcap_read nad only downloading specified fields, forms and/or events
+using the built-in focused_metadata
 including some clean-up. Works with longitudinal projects with repeating
 instruments.
 }
--- a/man/redcap_wider.Rd
+++ b/man/redcap_wider.Rd
@ -4,18 +4,25 @@
 \alias{redcap_wider}
 \title{Redcap Wider}
 \usage{
-redcap_wider(list, names.glud = "{.value}_{redcap_event_name}_long")
+redcap_wider(
+  list,
+  event.glue = "{.value}_{redcap_event_name}",
+  inst.glue = "{.value}_{redcap_repeat_instance}"
+)
 }
 \arguments{
 \item{list}{A list of data frames.}

-\item{names.glud}{A string to glue the column names together.}
+\item{event.glue}{A dplyr::glue string for repeated events naming}
+
+\item{inst.glue}{A dplyr::glue string for repeated instruments naming}
 }
 \value{
 The list of data frames in wide format.
 }
 \description{
 Converts a list of REDCap data frames from long to wide format.
+Handles longitudinal projects, but not yet repeated instruments.
 }
 \examples{
 list <- list(data.frame(record_id = c(1,2,1,2),
--- a/man/sanitize_split.Rd
+++ b/man/sanitize_split.Rd
@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.r
+\name{sanitize_split}
+\alias{sanitize_split}
+\title{Sanitize list of data frames}
+\usage{
+sanitize_split(
+  l,
+  generic.names = c("record_id", "redcap_event_name", "redcap_repeat_instrument",
+    "redcap_repeat_instance")
+)
+}
+\arguments{
+\item{l}{A list of data frames.}
+
+\item{generic.names}{A vector of generic names to be excluded.}
+}
+\value{
+A list of data frames with generic names excluded.
+}
+\description{
+Removing empty rows
+}
--- a/man/split_non_repeating_forms.Rd
+++ b/man/split_non_repeating_forms.Rd
@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.r
+\name{split_non_repeating_forms}
+\alias{split_non_repeating_forms}
+\title{Split a data frame into separate tables for each form}
+\usage{
+split_non_repeating_forms(table, universal_fields, fields)
+}
+\arguments{
+\item{table}{A data frame}
+
+\item{universal_fields}{A character vector of fields that should be included
+in every table}
+
+\item{fields}{A two-column matrix containing the names of fields that should
+be included in each form}
+}
+\value{
+A list of data frames, one for each non-repeating form
+}
+\description{
+Split a data frame into separate tables for each form
+}
+\examples{
+# Create a table
+table <- data.frame(
+  id = c(1, 2, 3, 4, 5),
+  form_a_name = c("John", "Alice", "Bob", "Eve", "Mallory"),
+  form_a_age = c(25, 30, 25, 15, 20),
+  form_b_name = c("John", "Alice", "Bob", "Eve", "Mallory"),
+  form_b_gender = c("M", "F", "M", "F", "F")
+)
+
+# Create the universal fields
+universal_fields <- c("id")
+
+# Create the fields
+fields <- matrix(
+  c("form_a_name", "form_a",
+    "form_a_age", "form_a",
+    "form_b_name", "form_b",
+    "form_b_gender", "form_b"),
+  ncol = 2, byrow = TRUE
+)
+
+# Split the table
+split_non_repeating_forms(table, universal_fields, fields)
+}
--- a/tests/testthat.R
+++ b/tests/testthat.R
@ -1,4 +1,4 @@
 library(testthat)
-library(REDCapRITS)
+library(REDCapCAST)

-test_check("REDCapRITS")
+test_check("REDCapCAST")
--- a/tests/testthat/.DS_Store
+++ b/tests/testthat/.DS_Store
--- a/tests/testthat/data/ExampleProject_DataDictionary_2018-06-07.csv
+++ b/tests/testthat/data/ExampleProject_DataDictionary_2018-06-07.csv
@ -1,19 +1,19 @@
-"Variable / Field Name","Form Name","Section Header","Field Type","Field Label","Choices, Calculations, OR Slider Labels","Field Note","Text Validation Type OR Show Slider Number","Text Validation Min","Text Validation Max",Identifier?,"Branching Logic (Show field only if...)","Required Field?","Custom Alignment","Question Number (surveys only)","Matrix Group Name","Matrix Ranking?","Field Annotation"
+field_name,form_name,section_header,field_type,field_label,select_choices_or_calculations,field_note,text_validation_type_or_show_slider_number,text_validation_min,text_validation_max,identifier,branching_logic,required_field,custom_alignment,question_number,matrix_group_name,matrix_ranking,field_annotation
 row,motor_trend_cars,,text,Name,,,,,,,,,,,,,
-mpg,motor_trend_cars,,text,"Miles/(US) gallon",,,number,,,,,,,,,,
-cyl,motor_trend_cars,,radio,"Number of cylinders","3, 3 | 4, 4 | 5, 5 | 6, 6 | 7, 7 | 8, 8",,,,,,,,,,,,
+mpg,motor_trend_cars,,text,Miles/(US) gallon,,,number,,,,,,,,,,
+cyl,motor_trend_cars,,radio,Number of cylinders,"3, 3 | 4, 4 | 5, 5 | 6, 6 | 7, 7 | 8, 8",,,,,,,,,,,,
 disp,motor_trend_cars,,text,Displacement,,(cu.in.),number,,,,,,,,,,
-hp,motor_trend_cars,,text,"Gross horsepower",,,number,,,,,,,,,,
-drat,motor_trend_cars,,text,"Rear axle ratio",,,number,,,,,,,,,,
-wt,motor_trend_cars,,text,Weight,,"(1000 lbs)",number,,,,,,,,,,
-qsec,motor_trend_cars,,text,"1/4 mile time",,,number,,,,,,,,,,
-vs,motor_trend_cars,,yesno,"V engine?",,,,,,,,,,,,,
+hp,motor_trend_cars,,text,Gross horsepower,,,number,,,,,,,,,,
+drat,motor_trend_cars,,text,Rear axle ratio,,,number,,,,,,,,,,
+wt,motor_trend_cars,,text,Weight,,(1000 lbs),number,,,,,,,,,,
+qsec,motor_trend_cars,,text,1/4 mile time,,,number,,,,,,,,,,
+vs,motor_trend_cars,,yesno,V engine?,,,,,,,,,,,,,
 am,motor_trend_cars,,dropdown,Transmission,"0, Automatic | 1, Manual"," (0 = automatic, 1 = manual)",,,,,,,,,,,
-gear,motor_trend_cars,,radio,"Number of forward gears","3, 3 | 4, 4 | 5, 5",,,,,,,,,,,,
-carb,motor_trend_cars,,radio,"Number of carburetors","1, 1 | 2, 2 | 3, 3 | 4, 4 | 5, 5 | 6, 6 | 7, 7 | 8, 8",,,,,,,,,,,,
-color_available,motor_trend_cars,,checkbox,"Colors Available","red, Red | green, Green | blue, Blue | black, Black",,,,,,,,,,,,
-letter_group,grouping,,checkbox,"Which group?","a, A | b, B | c, C",,,,,,,,,,,,
-choice,grouping,,radio,"Choose one","choice1, Choice 1 | choice2, Choice 2",,,,,,,,,,,,
-price,sale,,text,"Sale price",,,number_2dp,,,,,,,,,,
+gear,motor_trend_cars,,radio,Number of forward gears,"3, 3 | 4, 4 | 5, 5",,,,,,,,,,,,
+carb,motor_trend_cars,,radio,Number of carburetors,"1, 1 | 2, 2 | 3, 3 | 4, 4 | 5, 5 | 6, 6 | 7, 7 | 8, 8",,,,,,,,,,,,
+color_available,motor_trend_cars,,checkbox,Colors Available,"red, Red | green, Green | blue, Blue | black, Black",,,,,,,,,,,,
+letter_group,grouping,,checkbox,Which group?,"a, A | b, B | c, C",,,,,,,,,,,,
+choice,grouping,,radio,Choose one,"choice1, Choice 1 | choice2, Choice 2",,,,,,,,,,,,
+price,sale,,text,Sale price,,,number_2dp,,,,,,,,,,
 color,sale,,dropdown,Color,"1, red | 2, green | 3, blue | 4, black",,,,,,,,,,,,
-customer,sale,,text,"Customer Name",,,,,,,,,RH,,,,
+customer,sale,,text,Customer Name,,,,,,,,,RH,,,,
--- a/tests/testthat/helper-paths.R
+++ b/tests/testthat/helper-paths.R
@ -2,7 +2,7 @@
 #   system.file(
 #     "testdata",
 #     x,
-#     package = "REDCapRITS"
+#     package = "REDCapCAST"
 #   )
 # }

--- a/tests/testthat/test-csv-exports.R
+++ b/tests/testthat/test-csv-exports.R
@ -15,6 +15,22 @@ test_that("CSV export matches reference", {
  expect_known_hash(redcap_output_csv1, "f74558d1939c17d9ff0e08a19b956e26")
 })

+# Test that REDCap_split can handle a focused dataset
+
+records_red <- records[!records$redcap_repeat_instrument == "sale",
+                   !names(records) %in% metadata$field_name[metadata$form_name == "sale"] &
+                     !names(records) == "sale_complete"]
+records_red$redcap_repeat_instrument <- as.character(records_red$redcap_repeat_instrument)
+
+redcap_output_red <- REDCap_split(records_red, metadata)
+
+
+test_that("REDCap_split handles subset dataset",
+          {
+            testthat::expect_length(redcap_output_red,1)
+          })
+
+
 # Test that R code enhanced CSV export matches reference --------------------
 if (requireNamespace("Hmisc", quietly = TRUE)) {
  test_that("R code enhanced export matches reference", {
@ -53,3 +69,5 @@ if (requireNamespace("readr", quietly = TRUE)) {
            })

 }
+
+
--- a/tests/testthat/test-read_redcap_tables.R
+++ b/tests/testthat/test-read_redcap_tables.R
@ -1,4 +1,3 @@
-# Unit Test

 # Test that the function throws an error when uri and token are not provided
 test_that("read_redcap_tables throws error when uri and token are not provided",
--- a/tests/testthat/test-redcap_wider.R
+++ b/tests/testthat/test-redcap_wider.R
@ -4,7 +4,30 @@ test_that("redcap_wider() returns expected output", {

  expect_equal(redcap_wider(list),
               data.frame(record_id = c(1,2),
-                          age_baseline_long = c(25,26),
-                          age_followup_long = c(27,28),
+                          age_baseline = c(25,26),
+                          age_followup = c(27,28),
                          gender = c("male","female")))
 })
+
+
+# Using test data
+
+# Set up the path and data -------------------------------------------------
+file_paths <- sapply(
+  c(records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv",
+    metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv"),
+  get_data_location
+)
+
+redcap <- lapply(file_paths, read.csv, stringsAsFactors = FALSE)
+redcap[["metadata"]] <- with(redcap, metadata[metadata[, 1] > "",])
+list <-
+  with(redcap, REDCap_split(records, metadata, forms = "all"))
+
+wide_ds <- redcap_wider(list)
+
+test_that("redcap_wider() returns wide output from CSV",{
+  expect_equal(ncol(wide_ds),171)
+})
+
+