From d15c2d35adc6ccd22c2b371c8fadfae83c77bb4e Mon Sep 17 00:00:00 2001 From: "Paul W. Egeler, M.S., GStat" Date: Mon, 8 Jul 2019 18:01:48 -0400 Subject: [PATCH] Increment to v 0.2.0 and closes #10 Form timestamp fields also captured now. --- R/DESCRIPTION | 2 +- R/NEWS.md | 5 +++ R/R/REDCap_split.r | 23 +++++++++- R/R/utils.r | 30 +++++++++++++ R/tests/testthat/test-forms-arg.R | 63 ++++++++++++++++++++++++++++ R/tests/testthat/test-longitudinal.R | 2 +- 6 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 R/tests/testthat/test-forms-arg.R diff --git a/R/DESCRIPTION b/R/DESCRIPTION index 0a493f5..3163f9a 100644 --- a/R/DESCRIPTION +++ b/R/DESCRIPTION @@ -1,6 +1,6 @@ Package: REDCapRITS Title: REDCap Repeating Instrument Table Splitter -Version: 0.1.0 +Version: 0.2.0 Authors@R: c( person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut", "cre")), person("Spectrum Health, Grand Rapids, MI", role = "cph")) diff --git a/R/NEWS.md b/R/NEWS.md index 4f5c340..e79fe06 100644 --- a/R/NEWS.md +++ b/R/NEWS.md @@ -1,3 +1,8 @@ +# REDCapRITS 0.2.0 (Release date: 2019-??-??) + + * [feature] User can now separate each form into its own data.frame, regardless if it is a repeating instrument or not. (#10) + * [bug] Handles auto-generated form timestamp fields. + # REDCapRITS 0.1.0 (Release date: 2019-07-01) * [feature] User can now specify the name of the 'primary' table, which previously was left blank. (#9) diff --git a/R/R/REDCap_split.r b/R/R/REDCap_split.r index d12db72..d28a26a 100644 --- a/R/R/REDCap_split.r +++ b/R/R/REDCap_split.r @@ -134,12 +134,13 @@ REDCap_split <- function(records, # Split the table based on instrument out <- split.data.frame(records, records$redcap_repeat_instrument) + primary_table_index <- which(names(out) == "") if (forms == "repeating" && primary_table_name %in% subtables) { warning("The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank.") primary_table_name <- "" } else if (primary_table_name > "") { - names(out)[[which(names(out) == "")]] <- primary_table_name + names(out)[[primary_table_index]] <- primary_table_name } # Delete the variables that are not relevant @@ -153,7 +154,7 @@ REDCap_split <- function(records, fields[!fields[,2] %in% subtables, 1] ) ) - out[[which(names(out) == primary_table_name)]] <- out[[which(names(out) == primary_table_name)]][out_fields] + out[[primary_table_index]] <- out[[primary_table_index]][out_fields] } else { @@ -169,6 +170,24 @@ REDCap_split <- function(records, } } + + if (forms == "all") { + + out <- c( + split_non_repeating_forms( + out[[primary_table_index]], + universal_fields, + fields[!fields[,2] %in% subtables,] + ), + out[-primary_table_index] + ) + + } + + } else { + + out <- split_non_repeating_forms(records, universal_fields, fields) + } out diff --git a/R/R/utils.r b/R/R/utils.r index 9115671..87d1509 100644 --- a/R/R/utils.r +++ b/R/R/utils.r @@ -15,6 +15,20 @@ match_fields_to_form <- function(metadata, vars_in_data) { fields <- rbind(fields, form_complete_fields) + # Process survey timestamps + timestamps <- intersect(vars_in_data, paste0(form_names, "_timestamp")) + if (length(timestamps)) { + + timestamp_fields <- data.frame( + field_name = timestamps, + form_name = sub("_timestamp$", "", timestamps), + stringsAsFactors = FALSE + ) + + fields <- rbind(fields, timestamp_fields) + + } + # Process checkbox fields if (any(metadata$field_type == "checkbox")) { @@ -74,3 +88,19 @@ match_fields_to_form <- function(metadata, vars_in_data) { fields } + + +split_non_repeating_forms <- function(table, universal_fields, fields) { + + forms <- unique(fields[[2]]) + + x <- lapply( + forms, + function (x) { + table[names(table) %in% union(universal_fields, fields[fields[,2] == x,1])] + }) + + structure(x, names = forms) + +} + diff --git a/R/tests/testthat/test-forms-arg.R b/R/tests/testthat/test-forms-arg.R new file mode 100644 index 0000000..74c3507 --- /dev/null +++ b/R/tests/testthat/test-forms-arg.R @@ -0,0 +1,63 @@ +context("Using the `forms = 'all'` argument") + +# Global variables -------------------------------------------------------- + +# Cars +metadata <- jsonlite::fromJSON( + get_data_location( + "ExampleProject_metadata.json" + ) +) + +records <- jsonlite::fromJSON( + get_data_location( + "ExampleProject_records.json" + ) +) + +redcap_output_json <- REDCap_split(records, metadata, forms = "all") + +# Longitudinal +file_paths <- sapply( + c( + records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv", + metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv" + ), get_data_location +) + +redcap <- lapply(file_paths, read.csv, stringsAsFactors = FALSE) +redcap[["metadata"]] <- with(redcap, metadata[metadata[,1] > "",]) +redcap_output_long <- with(redcap, REDCap_split(records, metadata, forms = "all")) +redcap_long_names <- names(redcap[[1]]) + +# Tests ------------------------------------------------------------------- + +test_that("Each form is an element in the list", { + + expect_length(redcap_output_json, 3L) + expect_identical(names(redcap_output_json), c("motor_trend_cars", "grouping", "sale")) + +}) + +test_that("All variables land somewhere", { + + expect_true(setequal(names(records), Reduce("union", sapply(redcap_output_json, names)))) + +}) + + +test_that("Primary table name is ignored", { + expect_identical( + REDCap_split(records, metadata, "HELLO", "all"), + redcap_output_json + ) +}) + +test_that("Supports longitudinal data", { + + # setdiff(redcap_long_names, Reduce("union", sapply(redcap_output_long, names))) + ## [1] "informed_consent_and_addendum_timestamp" + + expect_true(setequal(redcap_long_names, Reduce("union", sapply(redcap_output_long, names)))) + +}) diff --git a/R/tests/testthat/test-longitudinal.R b/R/tests/testthat/test-longitudinal.R index c64cc14..3b955c3 100644 --- a/R/tests/testthat/test-longitudinal.R +++ b/R/tests/testthat/test-longitudinal.R @@ -13,5 +13,5 @@ test_that("CSV export matches reference", { redcap_output <- with(redcap, REDCap_split(records, metadata)) - expect_known_hash(redcap_output, "dff3a52955") + expect_known_hash(redcap_output, "0934bcb292") })