working to submit to cran

This commit is contained in:
AG Damsbo 2023-01-16 09:49:17 +01:00
parent 992091f94c
commit be3969d007
11 changed files with 202 additions and 267 deletions

View File

@ -1,13 +1,14 @@
Package: REDCapRITS Package: REDCapRITS
Title: REDCap Repeating Instrument Table Splitter Title: REDCap Repeating Instrument Table Splitter Fork
Version: 0.2.2.1 Version: 0.2.2.1
Authors@R: c( Authors@R: c(
person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut", "cre")), person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut")),
person("Spectrum Health, Grand Rapids, MI", role = "cph"), person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("cre", "ctb","cph"),
person("Andreas Gammelgaard", "Damsbo", , "agdamsbo@clin.au.dk", role = c("ctb"),
comment = c(ORCID = "0000-0002-7559-1154"))) comment = c(ORCID = "0000-0002-7559-1154")))
Copyright: Spectrum Health, Grand Rapids, MI Copyright: Spectrum Health, Grand Rapids, MI
Description: Split REDCap repeating instruments output into multiple tables. Description: This is a fork of REDCapRITS by Paul Egeler and Spectrum Health. See
[https://github.com/SpectrumHealthResearch/REDCapRITS](https://github.com/SpectrumHealthResearch/REDCapRITS).
Split REDCap repeating instruments output into multiple tables.
This will take raw output from a REDCap export and split it into a base table This will take raw output from a REDCap export and split it into a base table
and child tables for each repeating instrument. and child tables for each repeating instrument.
REDCap (Research Electronic Data Capture) is a secure, web-based software REDCap (Research Electronic Data Capture) is a secure, web-based software
@ -29,8 +30,8 @@ License: GPL-3
Encoding: UTF-8 Encoding: UTF-8
LazyData: true LazyData: true
RoxygenNote: 7.2.3 RoxygenNote: 7.2.3
URL: https://github.com/SpectrumHealthResearch/REDCapRITS URL: https://github.com/agdamsbo/REDCapRITS
BugReports: https://github.com/SpectrumHealthResearch/REDCapRITS/issues BugReports: https://github.com/agdamsbo/REDCapRITS/issues
Collate: Collate:
'utils.r' 'utils.r'
'process_user_input.r' 'process_user_input.r'

View File

@ -82,12 +82,11 @@
REDCap_split <- function(records, REDCap_split <- function(records,
metadata, metadata,
primary_table_name = "", primary_table_name = "",
forms = c("repeating", "all") forms = c("repeating", "all")) {
) {
# Process user input # Process user input
records <- process_user_input(records) records <- process_user_input(records)
metadata <- as.data.frame(process_user_input(metadata)) # See issue #12 metadata <-
as.data.frame(process_user_input(metadata)) # See issue #12
# Get the variable names in the dataset # Get the variable names in the dataset
vars_in_data <- names(records) vars_in_data <- names(records)
@ -96,7 +95,8 @@ REDCap_split <- function(records,
forms <- match.arg(forms) forms <- match.arg(forms)
# Check to see if there were any repeating instruments # Check to see if there were any repeating instruments
if (forms == "repeating" && !"redcap_repeat_instrument" %in% vars_in_data) { if (forms == "repeating" &&
!"redcap_repeat_instrument" %in% vars_in_data) {
stop("There are no repeating instruments in this dataset.") stop("There are no repeating instruments in this dataset.")
} }
@ -113,7 +113,8 @@ REDCap_split <- function(records,
names(metadata) <- metadata_names names(metadata) <- metadata_names
# Make sure that no metadata columns are factors # Make sure that no metadata columns are factors
metadata <- rapply(metadata, as.character, classes = "factor", how = "replace") metadata <-
rapply(metadata, as.character, classes = "factor", how = "replace")
# Find the fields and associated form # Find the fields and associated form
fields <- match_fields_to_form(metadata, vars_in_data) fields <- match_fields_to_form(metadata, vars_in_data)
@ -131,22 +132,23 @@ REDCap_split <- function(records,
if ("redcap_repeat_instrument" %in% vars_in_data) { if ("redcap_repeat_instrument" %in% vars_in_data) {
# Variables to be at the beginning of each repeating instrument # Variables to be at the beginning of each repeating instrument
repeat_instrument_fields <- grep( repeat_instrument_fields <- grep("^redcap_repeat.*",
"^redcap_repeat.*",
vars_in_data, vars_in_data,
value = TRUE value = TRUE)
)
# Identify the subtables in the data # Identify the subtables in the data
subtables <- unique(records$redcap_repeat_instrument) subtables <- unique(records$redcap_repeat_instrument)
subtables <- subtables[subtables != ""] subtables <- subtables[subtables != ""]
# Split the table based on instrument # Split the table based on instrument
out <- split.data.frame(records, records$redcap_repeat_instrument) out <-
split.data.frame(records, records$redcap_repeat_instrument)
primary_table_index <- which(names(out) == "") primary_table_index <- which(names(out) == "")
if (forms == "repeating" && primary_table_name %in% subtables) { if (forms == "repeating" && primary_table_name %in% subtables) {
warning("The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank.") warning(
"The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank."
)
primary_table_name <- "" primary_table_name <- ""
} else if (primary_table_name > "") { } else if (primary_table_name > "") {
names(out)[[primary_table_index]] <- primary_table_name names(out)[[primary_table_index]] <- primary_table_name
@ -154,26 +156,16 @@ REDCap_split <- function(records,
# Delete the variables that are not relevant # Delete the variables that are not relevant
for (i in names(out)) { for (i in names(out)) {
if (i == primary_table_name) { if (i == primary_table_name) {
out_fields <- which(vars_in_data %in% c(universal_fields,
out_fields <- which( fields[!fields[, 2] %in% subtables, 1]))
vars_in_data %in% c( out[[primary_table_index]] <-
universal_fields, out[[primary_table_index]][out_fields]
fields[!fields[,2] %in% subtables, 1]
)
)
out[[primary_table_index]] <- out[[primary_table_index]][out_fields]
} else { } else {
out_fields <- which(vars_in_data %in% c(universal_fields,
out_fields <- which(
vars_in_data %in% c(
universal_fields,
repeat_instrument_fields, repeat_instrument_fields,
fields[fields[,2] == i, 1] fields[fields[, 2] == i, 1]))
)
)
out[[i]] <- out[[i]][out_fields] out[[i]] <- out[[i]][out_fields]
} }
@ -181,20 +173,14 @@ REDCap_split <- function(records,
} }
if (forms == "all") { if (forms == "all") {
out <- c(split_non_repeating_forms(out[[primary_table_index]],
out <- c(
split_non_repeating_forms(
out[[primary_table_index]],
universal_fields, universal_fields,
fields[!fields[,2] %in% subtables,] fields[!fields[, 2] %in% subtables, ]),
), out[-primary_table_index])
out[-primary_table_index]
)
} }
} else { } else {
out <- split_non_repeating_forms(records, universal_fields, fields) out <- split_non_repeating_forms(records, universal_fields, fields)
} }

View File

@ -17,7 +17,6 @@ process_user_input.data.frame <- function(x, ...) {
} }
process_user_input.character <- function(x, ...) { process_user_input.character <- function(x, ...) {
if (!requireNamespace("jsonlite", quietly = TRUE)) { if (!requireNamespace("jsonlite", quietly = TRUE)) {
stop( stop(
"The package 'jsonlite' is needed to convert ", "The package 'jsonlite' is needed to convert ",
@ -35,7 +34,6 @@ process_user_input.character <- function(x, ...) {
} }
process_user_input.response <- function(x, ...) { process_user_input.response <- function(x, ...) {
process_user_input(rawToChar(x$content)) process_user_input(rawToChar(x$content))
} }

View File

@ -1,9 +1,6 @@
match_fields_to_form <- function(metadata, vars_in_data) { match_fields_to_form <- function(metadata, vars_in_data) {
fields <- metadata[!metadata$field_type %in% c("descriptive", "checkbox"),
fields <- metadata[ c("field_name", "form_name")]
!metadata$field_type %in% c("descriptive", "checkbox"),
c("field_name", "form_name")
]
# Process instrument status fields # Process instrument status fields
form_names <- unique(metadata$form_name) form_names <- unique(metadata$form_name)
@ -16,9 +13,9 @@ match_fields_to_form <- function(metadata, vars_in_data) {
fields <- rbind(fields, form_complete_fields) fields <- rbind(fields, form_complete_fields)
# Process survey timestamps # Process survey timestamps
timestamps <- intersect(vars_in_data, paste0(form_names, "_timestamp")) timestamps <-
intersect(vars_in_data, paste0(form_names, "_timestamp"))
if (length(timestamps)) { if (length(timestamps)) {
timestamp_fields <- data.frame( timestamp_fields <- data.frame(
field_name = timestamps, field_name = timestamps,
form_name = sub("_timestamp$", "", timestamps), form_name = sub("_timestamp$", "", timestamps),
@ -31,17 +28,12 @@ match_fields_to_form <- function(metadata, vars_in_data) {
# Process checkbox fields # Process checkbox fields
if (any(metadata$field_type == "checkbox")) { if (any(metadata$field_type == "checkbox")) {
checkbox_basenames <- metadata[metadata$field_type == "checkbox",
checkbox_basenames <- metadata[ c("field_name", "form_name")]
metadata$field_type == "checkbox",
c("field_name", "form_name")
]
checkbox_fields <- checkbox_fields <-
do.call( do.call("rbind",
"rbind", apply(checkbox_basenames,
apply(
checkbox_basenames,
1, 1,
function(x, y) function(x, y)
data.frame( data.frame(
@ -50,9 +42,7 @@ match_fields_to_form <- function(metadata, vars_in_data) {
stringsAsFactors = FALSE, stringsAsFactors = FALSE,
row.names = NULL row.names = NULL
), ),
y = vars_in_data y = vars_in_data))
)
)
fields <- rbind(fields, checkbox_fields) fields <- rbind(fields, checkbox_fields)
@ -60,12 +50,9 @@ match_fields_to_form <- function(metadata, vars_in_data) {
# Process ".*\\.factor" fields supplied by REDCap's export data R script # Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) { if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <- factor_fields <-
do.call( do.call("rbind",
"rbind", apply(fields,
apply(
fields,
1, 1,
function(x, y) { function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y) field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
@ -77,9 +64,7 @@ match_fields_to_form <- function(metadata, vars_in_data) {
row.names = NULL row.names = NULL
) )
}, },
y = vars_in_data y = vars_in_data))
)
)
fields <- rbind(fields, factor_fields) fields <- rbind(fields, factor_fields)
@ -90,12 +75,11 @@ match_fields_to_form <- function(metadata, vars_in_data) {
} }
split_non_repeating_forms <- function(table, universal_fields, fields) { split_non_repeating_forms <-
function(table, universal_fields, fields) {
forms <- unique(fields[[2]]) forms <- unique(fields[[2]])
x <- lapply( x <- lapply(forms,
forms,
function (x) { function (x) {
table[names(table) %in% union(universal_fields, fields[fields[, 2] == x, 1])] table[names(table) %in% union(universal_fields, fields[fields[, 2] == x, 1])]
}) })
@ -103,4 +87,3 @@ split_non_repeating_forms <- function(table, universal_fields, fields) {
structure(x, names = forms) structure(x, names = forms)
} }

View File

@ -36,22 +36,32 @@ REDCap_process_csv <- function(data) {
#Setting Factors(will create new variable for factors) #Setting Factors(will create new variable for factors)
data$redcap_repeat_instrument.factor = factor(data$redcap_repeat_instrument,levels=c("sale")) data$redcap_repeat_instrument.factor = factor(data$redcap_repeat_instrument, levels =
c("sale"))
data$cyl.factor = factor(data$cyl, levels = c("3", "4", "5", "6", "7", "8")) data$cyl.factor = factor(data$cyl, levels = c("3", "4", "5", "6", "7", "8"))
data$vs.factor = factor(data$vs, levels = c("1", "0")) data$vs.factor = factor(data$vs, levels = c("1", "0"))
data$am.factor = factor(data$am, levels = c("0", "1")) data$am.factor = factor(data$am, levels = c("0", "1"))
data$gear.factor = factor(data$gear, levels = c("3", "4", "5")) data$gear.factor = factor(data$gear, levels = c("3", "4", "5"))
data$carb.factor = factor(data$carb, levels = c("1", "2", "3", "4", "5", "6", "7", "8")) data$carb.factor = factor(data$carb, levels = c("1", "2", "3", "4", "5", "6", "7", "8"))
data$color_available___red.factor = factor(data$color_available___red,levels=c("0","1")) data$color_available___red.factor = factor(data$color_available___red, levels =
data$color_available___green.factor = factor(data$color_available___green,levels=c("0","1")) c("0", "1"))
data$color_available___blue.factor = factor(data$color_available___blue,levels=c("0","1")) data$color_available___green.factor = factor(data$color_available___green, levels =
data$color_available___black.factor = factor(data$color_available___black,levels=c("0","1")) c("0", "1"))
data$motor_trend_cars_complete.factor = factor(data$motor_trend_cars_complete,levels=c("0","1","2")) data$color_available___blue.factor = factor(data$color_available___blue, levels =
data$letter_group___a.factor = factor(data$letter_group___a,levels=c("0","1")) c("0", "1"))
data$letter_group___b.factor = factor(data$letter_group___b,levels=c("0","1")) data$color_available___black.factor = factor(data$color_available___black, levels =
data$letter_group___c.factor = factor(data$letter_group___c,levels=c("0","1")) c("0", "1"))
data$motor_trend_cars_complete.factor = factor(data$motor_trend_cars_complete, levels =
c("0", "1", "2"))
data$letter_group___a.factor = factor(data$letter_group___a, levels =
c("0", "1"))
data$letter_group___b.factor = factor(data$letter_group___b, levels =
c("0", "1"))
data$letter_group___c.factor = factor(data$letter_group___c, levels =
c("0", "1"))
data$choice.factor = factor(data$choice, levels = c("choice1", "choice2")) data$choice.factor = factor(data$choice, levels = c("choice1", "choice2"))
data$grouping_complete.factor = factor(data$grouping_complete,levels=c("0","1","2")) data$grouping_complete.factor = factor(data$grouping_complete, levels =
c("0", "1", "2"))
data$color.factor = factor(data$color, levels = c("1", "2", "3", "4")) data$color.factor = factor(data$color, levels = c("1", "2", "3", "4"))
data$sale_complete.factor = factor(data$sale_complete, levels = c("0", "1", "2")) data$sale_complete.factor = factor(data$sale_complete, levels = c("0", "1", "2"))

View File

@ -6,4 +6,5 @@
# ) # )
# } # }
get_data_location <- function(x) file.path("data", x) get_data_location <- function(x)
file.path("data", x)

View File

@ -1,26 +1,12 @@
context("Reading in JSON")
# Check the RCurl export --------------------------------------------------- # Check the RCurl export ---------------------------------------------------
test_that("JSON character vector from RCurl matches reference", { test_that("JSON character vector from RCurl matches reference", {
metadata <- jsonlite::fromJSON(get_data_location("ExampleProject_metadata.json"))
metadata <- jsonlite::fromJSON( records <- jsonlite::fromJSON(get_data_location("ExampleProject_records.json"))
get_data_location(
"ExampleProject_metadata.json"
)
)
records <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_records.json"
)
)
redcap_output_json1 <- REDCap_split(records, metadata) redcap_output_json1 <- REDCap_split(records, metadata)
expect_known_hash(redcap_output_json1, "2c8b6531597182af1248f92124161e0c") expect_known_hash(redcap_output_json1, "2c8b6531597182af1248f92124161e0c")
}) })
# Check the httr export ---------------------------------------------------
# Something will go here.

View File

@ -1,4 +1,3 @@
context("CSV Exports")
# Set up the path and data ------------------------------------------------- # Set up the path and data -------------------------------------------------
metadata <- read.csv( metadata <- read.csv(
@ -6,10 +5,8 @@ metadata <- read.csv(
stringsAsFactors = TRUE stringsAsFactors = TRUE
) )
records <- read.csv( records <- read.csv(get_data_location("ExampleProject_DATA_2018-06-07_1129.csv"),
get_data_location("ExampleProject_DATA_2018-06-07_1129.csv"), stringsAsFactors = TRUE)
stringsAsFactors = TRUE
)
redcap_output_csv1 <- REDCap_split(records, metadata) redcap_output_csv1 <- REDCap_split(records, metadata)
@ -21,7 +18,8 @@ test_that("CSV export matches reference", {
# Test that R code enhanced CSV export matches reference -------------------- # Test that R code enhanced CSV export matches reference --------------------
if (requireNamespace("Hmisc", quietly = TRUE)) { if (requireNamespace("Hmisc", quietly = TRUE)) {
test_that("R code enhanced export matches reference", { test_that("R code enhanced export matches reference", {
redcap_output_csv2 <- REDCap_split(REDCap_process_csv(records), metadata) redcap_output_csv2 <-
REDCap_split(REDCap_process_csv(records), metadata)
expect_known_hash(redcap_output_csv2, "34f82cab35bf8aae47d08cd96f743e6b") expect_known_hash(redcap_output_csv2, "34f82cab35bf8aae47d08cd96f743e6b")
}) })
@ -29,33 +27,22 @@ if (requireNamespace("Hmisc", quietly = TRUE)) {
if (requireNamespace("readr", quietly = TRUE)) { if (requireNamespace("readr", quietly = TRUE)) {
context("Compatibility with readr") context("Compatibility with readr")
metadata <- readr::read_csv( metadata <- readr::read_csv(get_data_location("ExampleProject_DataDictionary_2018-06-07.csv"))
get_data_location(
"ExampleProject_DataDictionary_2018-06-07.csv"
)
)
records <- readr::read_csv( records <- readr::read_csv(get_data_location("ExampleProject_DATA_2018-06-07_1129.csv"))
get_data_location(
"ExampleProject_DATA_2018-06-07_1129.csv"
)
)
redcap_output_readr <- REDCap_split(records, metadata) redcap_output_readr <- REDCap_split(records, metadata)
expect_matching_elements <- function(FUN) { expect_matching_elements <- function(FUN) {
FUN <- match.fun(FUN) FUN <- match.fun(FUN)
expect_identical( expect_identical(lapply(redcap_output_readr, FUN),
lapply(redcap_output_readr, FUN), lapply(redcap_output_csv1, FUN))
lapply(redcap_output_csv1, FUN)
)
} }
test_that("Result of data read in with `readr` will match result with `read.csv`", { test_that("Result of data read in with `readr` will match result with `read.csv`",
{
# The list itself # The list itself
expect_identical(length(redcap_output_readr), length(redcap_output_csv1)) expect_identical(length(redcap_output_readr), length(redcap_output_csv1))
expect_identical(names(redcap_output_readr), names(redcap_output_csv1)) expect_identical(names(redcap_output_readr), names(redcap_output_csv1))

View File

@ -1,63 +1,57 @@
context("Using the `forms = 'all'` argument")
# Global variables -------------------------------------------------------- # Global variables --------------------------------------------------------
# Cars # Cars
metadata <- jsonlite::fromJSON( metadata <-
get_data_location( jsonlite::fromJSON(get_data_location("ExampleProject_metadata.json"))
"ExampleProject_metadata.json"
)
)
records <- jsonlite::fromJSON( records <-
get_data_location( jsonlite::fromJSON(get_data_location("ExampleProject_records.json"))
"ExampleProject_records.json"
)
)
redcap_output_json <- REDCap_split(records, metadata, forms = "all") redcap_output_json <- REDCap_split(records, metadata, forms = "all")
# Longitudinal # Longitudinal
file_paths <- sapply( file_paths <- sapply(
c( c(records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv",
records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv", metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv"),
metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv" get_data_location
), get_data_location
) )
redcap <- lapply(file_paths, read.csv, stringsAsFactors = FALSE) redcap <- lapply(file_paths, read.csv, stringsAsFactors = FALSE)
redcap[["metadata"]] <- with(redcap, metadata[metadata[, 1] > "",]) redcap[["metadata"]] <- with(redcap, metadata[metadata[, 1] > "",])
redcap_output_long <- with(redcap, REDCap_split(records, metadata, forms = "all")) redcap_output_long <-
with(redcap, REDCap_split(records, metadata, forms = "all"))
redcap_long_names <- names(redcap[[1]]) redcap_long_names <- names(redcap[[1]])
# Tests ------------------------------------------------------------------- # Tests -------------------------------------------------------------------
test_that("Each form is an element in the list", { test_that("Each form is an element in the list", {
expect_length(redcap_output_json, 3L) expect_length(redcap_output_json, 3L)
expect_identical(names(redcap_output_json), c("motor_trend_cars", "grouping", "sale")) expect_identical(names(redcap_output_json),
c("motor_trend_cars", "grouping", "sale"))
}) })
test_that("All variables land somewhere", { test_that("All variables land somewhere", {
expect_true(setequal(names(records), Reduce(
expect_true(setequal(names(records), Reduce("union", sapply(redcap_output_json, names)))) "union", sapply(redcap_output_json, names)
)))
}) })
test_that("Primary table name is ignored", { test_that("Primary table name is ignored", {
expect_identical( expect_identical(REDCap_split(records, metadata, "HELLO", "all"),
REDCap_split(records, metadata, "HELLO", "all"), redcap_output_json)
redcap_output_json
)
}) })
test_that("Supports longitudinal data", { test_that("Supports longitudinal data", {
# setdiff(redcap_long_names, Reduce("union", sapply(redcap_output_long, names))) # setdiff(redcap_long_names, Reduce("union", sapply(redcap_output_long, names)))
## [1] "informed_consent_and_addendum_timestamp" ## [1] "informed_consent_and_addendum_timestamp"
expect_true(setequal(redcap_long_names, Reduce("union", sapply(redcap_output_long, names)))) expect_true(setequal(redcap_long_names, Reduce(
"union", sapply(redcap_output_long, names)
)))
}) })

View File

@ -1,4 +1,4 @@
context("Longitudinal data") ## "Longitudinal data"
test_that("CSV export matches reference", { test_that("CSV export matches reference", {
file_paths <- sapply( file_paths <- sapply(

View File

@ -1,28 +1,17 @@
context("Primary table name processing") ## "Primary table name processing"
# Global variables ------------------------------------------------------- # Global variables -------------------------------------------------------
metadata <- jsonlite::fromJSON( metadata <- jsonlite::fromJSON(get_data_location("ExampleProject_metadata.json"))
get_data_location(
"ExampleProject_metadata.json"
)
)
records <- jsonlite::fromJSON( records <- jsonlite::fromJSON(get_data_location("ExampleProject_records.json"))
get_data_location(
"ExampleProject_records.json"
)
)
ref_hash <- "2c8b6531597182af1248f92124161e0c" ref_hash <- "2c8b6531597182af1248f92124161e0c"
# Tests ------------------------------------------------------------------- # Tests -------------------------------------------------------------------
test_that("Will not use a repeating instrument name for primary table", { test_that("Will not use a repeating instrument name for primary table", {
redcap_output_json1 <- expect_warning(REDCap_split(records, metadata, "sale"),
redcap_output_json1 <- expect_warning( "primary table")
REDCap_split(records, metadata, "sale"),
"primary table"
)
expect_known_hash(redcap_output_json1, ref_hash) expect_known_hash(redcap_output_json1, ref_hash)