working to submit to cran

This commit is contained in:
AG Damsbo 2023-01-16 09:49:17 +01:00
parent 992091f94c
commit be3969d007
11 changed files with 202 additions and 267 deletions

View File

@ -1,13 +1,14 @@
Package: REDCapRITS
Title: REDCap Repeating Instrument Table Splitter
Title: REDCap Repeating Instrument Table Splitter Fork
Version: 0.2.2.1
Authors@R: c(
person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut", "cre")),
person("Spectrum Health, Grand Rapids, MI", role = "cph"),
person("Andreas Gammelgaard", "Damsbo", , "agdamsbo@clin.au.dk", role = c("ctb"),
person("Paul", "Egeler", email = "paul.egeler@spectrumhealth.org", role = c("aut")),
person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("cre", "ctb","cph"),
comment = c(ORCID = "0000-0002-7559-1154")))
Copyright: Spectrum Health, Grand Rapids, MI
Description: Split REDCap repeating instruments output into multiple tables.
Description: This is a fork of REDCapRITS by Paul Egeler and Spectrum Health. See
[https://github.com/SpectrumHealthResearch/REDCapRITS](https://github.com/SpectrumHealthResearch/REDCapRITS).
Split REDCap repeating instruments output into multiple tables.
This will take raw output from a REDCap export and split it into a base table
and child tables for each repeating instrument.
REDCap (Research Electronic Data Capture) is a secure, web-based software
@ -29,8 +30,8 @@ License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
URL: https://github.com/SpectrumHealthResearch/REDCapRITS
BugReports: https://github.com/SpectrumHealthResearch/REDCapRITS/issues
URL: https://github.com/agdamsbo/REDCapRITS
BugReports: https://github.com/agdamsbo/REDCapRITS/issues
Collate:
'utils.r'
'process_user_input.r'

View File

@ -82,12 +82,11 @@
REDCap_split <- function(records,
metadata,
primary_table_name = "",
forms = c("repeating", "all")
) {
forms = c("repeating", "all")) {
# Process user input
records <- process_user_input(records)
metadata <- as.data.frame(process_user_input(metadata)) # See issue #12
metadata <-
as.data.frame(process_user_input(metadata)) # See issue #12
# Get the variable names in the dataset
vars_in_data <- names(records)
@ -96,7 +95,8 @@ REDCap_split <- function(records,
forms <- match.arg(forms)
# Check to see if there were any repeating instruments
if (forms == "repeating" && !"redcap_repeat_instrument" %in% vars_in_data) {
if (forms == "repeating" &&
!"redcap_repeat_instrument" %in% vars_in_data) {
stop("There are no repeating instruments in this dataset.")
}
@ -113,7 +113,8 @@ REDCap_split <- function(records,
names(metadata) <- metadata_names
# Make sure that no metadata columns are factors
metadata <- rapply(metadata, as.character, classes = "factor", how = "replace")
metadata <-
rapply(metadata, as.character, classes = "factor", how = "replace")
# Find the fields and associated form
fields <- match_fields_to_form(metadata, vars_in_data)
@ -131,22 +132,23 @@ REDCap_split <- function(records,
if ("redcap_repeat_instrument" %in% vars_in_data) {
# Variables to be at the beginning of each repeating instrument
repeat_instrument_fields <- grep(
"^redcap_repeat.*",
repeat_instrument_fields <- grep("^redcap_repeat.*",
vars_in_data,
value = TRUE
)
value = TRUE)
# Identify the subtables in the data
subtables <- unique(records$redcap_repeat_instrument)
subtables <- subtables[subtables != ""]
# Split the table based on instrument
out <- split.data.frame(records, records$redcap_repeat_instrument)
out <-
split.data.frame(records, records$redcap_repeat_instrument)
primary_table_index <- which(names(out) == "")
if (forms == "repeating" && primary_table_name %in% subtables) {
warning("The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank.")
warning(
"The label given to the primary table is already used by a repeating instrument. The primary table label will be left blank."
)
primary_table_name <- ""
} else if (primary_table_name > "") {
names(out)[[primary_table_index]] <- primary_table_name
@ -154,26 +156,16 @@ REDCap_split <- function(records,
# Delete the variables that are not relevant
for (i in names(out)) {
if (i == primary_table_name) {
out_fields <- which(
vars_in_data %in% c(
universal_fields,
fields[!fields[,2] %in% subtables, 1]
)
)
out[[primary_table_index]] <- out[[primary_table_index]][out_fields]
out_fields <- which(vars_in_data %in% c(universal_fields,
fields[!fields[, 2] %in% subtables, 1]))
out[[primary_table_index]] <-
out[[primary_table_index]][out_fields]
} else {
out_fields <- which(
vars_in_data %in% c(
universal_fields,
out_fields <- which(vars_in_data %in% c(universal_fields,
repeat_instrument_fields,
fields[fields[,2] == i, 1]
)
)
fields[fields[, 2] == i, 1]))
out[[i]] <- out[[i]][out_fields]
}
@ -181,20 +173,14 @@ REDCap_split <- function(records,
}
if (forms == "all") {
out <- c(
split_non_repeating_forms(
out[[primary_table_index]],
out <- c(split_non_repeating_forms(out[[primary_table_index]],
universal_fields,
fields[!fields[,2] %in% subtables,]
),
out[-primary_table_index]
)
fields[!fields[, 2] %in% subtables, ]),
out[-primary_table_index])
}
} else {
out <- split_non_repeating_forms(records, universal_fields, fields)
}

View File

@ -17,7 +17,6 @@ process_user_input.data.frame <- function(x, ...) {
}
process_user_input.character <- function(x, ...) {
if (!requireNamespace("jsonlite", quietly = TRUE)) {
stop(
"The package 'jsonlite' is needed to convert ",
@ -35,7 +34,6 @@ process_user_input.character <- function(x, ...) {
}
process_user_input.response <- function(x, ...) {
process_user_input(rawToChar(x$content))
}

View File

@ -1,9 +1,6 @@
match_fields_to_form <- function(metadata, vars_in_data) {
fields <- metadata[
!metadata$field_type %in% c("descriptive", "checkbox"),
c("field_name", "form_name")
]
fields <- metadata[!metadata$field_type %in% c("descriptive", "checkbox"),
c("field_name", "form_name")]
# Process instrument status fields
form_names <- unique(metadata$form_name)
@ -16,9 +13,9 @@ match_fields_to_form <- function(metadata, vars_in_data) {
fields <- rbind(fields, form_complete_fields)
# Process survey timestamps
timestamps <- intersect(vars_in_data, paste0(form_names, "_timestamp"))
timestamps <-
intersect(vars_in_data, paste0(form_names, "_timestamp"))
if (length(timestamps)) {
timestamp_fields <- data.frame(
field_name = timestamps,
form_name = sub("_timestamp$", "", timestamps),
@ -31,17 +28,12 @@ match_fields_to_form <- function(metadata, vars_in_data) {
# Process checkbox fields
if (any(metadata$field_type == "checkbox")) {
checkbox_basenames <- metadata[
metadata$field_type == "checkbox",
c("field_name", "form_name")
]
checkbox_basenames <- metadata[metadata$field_type == "checkbox",
c("field_name", "form_name")]
checkbox_fields <-
do.call(
"rbind",
apply(
checkbox_basenames,
do.call("rbind",
apply(checkbox_basenames,
1,
function(x, y)
data.frame(
@ -50,9 +42,7 @@ match_fields_to_form <- function(metadata, vars_in_data) {
stringsAsFactors = FALSE,
row.names = NULL
),
y = vars_in_data
)
)
y = vars_in_data))
fields <- rbind(fields, checkbox_fields)
@ -60,12 +50,9 @@ match_fields_to_form <- function(metadata, vars_in_data) {
# Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <-
do.call(
"rbind",
apply(
fields,
do.call("rbind",
apply(fields,
1,
function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
@ -77,9 +64,7 @@ match_fields_to_form <- function(metadata, vars_in_data) {
row.names = NULL
)
},
y = vars_in_data
)
)
y = vars_in_data))
fields <- rbind(fields, factor_fields)
@ -90,12 +75,11 @@ match_fields_to_form <- function(metadata, vars_in_data) {
}
split_non_repeating_forms <- function(table, universal_fields, fields) {
split_non_repeating_forms <-
function(table, universal_fields, fields) {
forms <- unique(fields[[2]])
x <- lapply(
forms,
x <- lapply(forms,
function (x) {
table[names(table) %in% union(universal_fields, fields[fields[, 2] == x, 1])]
})
@ -103,4 +87,3 @@ split_non_repeating_forms <- function(table, universal_fields, fields) {
structure(x, names = forms)
}

View File

@ -36,22 +36,32 @@ REDCap_process_csv <- function(data) {
#Setting Factors(will create new variable for factors)
data$redcap_repeat_instrument.factor = factor(data$redcap_repeat_instrument,levels=c("sale"))
data$redcap_repeat_instrument.factor = factor(data$redcap_repeat_instrument, levels =
c("sale"))
data$cyl.factor = factor(data$cyl, levels = c("3", "4", "5", "6", "7", "8"))
data$vs.factor = factor(data$vs, levels = c("1", "0"))
data$am.factor = factor(data$am, levels = c("0", "1"))
data$gear.factor = factor(data$gear, levels = c("3", "4", "5"))
data$carb.factor = factor(data$carb, levels = c("1", "2", "3", "4", "5", "6", "7", "8"))
data$color_available___red.factor = factor(data$color_available___red,levels=c("0","1"))
data$color_available___green.factor = factor(data$color_available___green,levels=c("0","1"))
data$color_available___blue.factor = factor(data$color_available___blue,levels=c("0","1"))
data$color_available___black.factor = factor(data$color_available___black,levels=c("0","1"))
data$motor_trend_cars_complete.factor = factor(data$motor_trend_cars_complete,levels=c("0","1","2"))
data$letter_group___a.factor = factor(data$letter_group___a,levels=c("0","1"))
data$letter_group___b.factor = factor(data$letter_group___b,levels=c("0","1"))
data$letter_group___c.factor = factor(data$letter_group___c,levels=c("0","1"))
data$color_available___red.factor = factor(data$color_available___red, levels =
c("0", "1"))
data$color_available___green.factor = factor(data$color_available___green, levels =
c("0", "1"))
data$color_available___blue.factor = factor(data$color_available___blue, levels =
c("0", "1"))
data$color_available___black.factor = factor(data$color_available___black, levels =
c("0", "1"))
data$motor_trend_cars_complete.factor = factor(data$motor_trend_cars_complete, levels =
c("0", "1", "2"))
data$letter_group___a.factor = factor(data$letter_group___a, levels =
c("0", "1"))
data$letter_group___b.factor = factor(data$letter_group___b, levels =
c("0", "1"))
data$letter_group___c.factor = factor(data$letter_group___c, levels =
c("0", "1"))
data$choice.factor = factor(data$choice, levels = c("choice1", "choice2"))
data$grouping_complete.factor = factor(data$grouping_complete,levels=c("0","1","2"))
data$grouping_complete.factor = factor(data$grouping_complete, levels =
c("0", "1", "2"))
data$color.factor = factor(data$color, levels = c("1", "2", "3", "4"))
data$sale_complete.factor = factor(data$sale_complete, levels = c("0", "1", "2"))

View File

@ -6,4 +6,5 @@
# )
# }
get_data_location <- function(x) file.path("data", x)
get_data_location <- function(x)
file.path("data", x)

View File

@ -1,26 +1,12 @@
context("Reading in JSON")
# Check the RCurl export ---------------------------------------------------
test_that("JSON character vector from RCurl matches reference", {
metadata <- jsonlite::fromJSON(get_data_location("ExampleProject_metadata.json"))
metadata <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_metadata.json"
)
)
records <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_records.json"
)
)
records <- jsonlite::fromJSON(get_data_location("ExampleProject_records.json"))
redcap_output_json1 <- REDCap_split(records, metadata)
expect_known_hash(redcap_output_json1, "2c8b6531597182af1248f92124161e0c")
})
# Check the httr export ---------------------------------------------------
# Something will go here.

View File

@ -1,4 +1,3 @@
context("CSV Exports")
# Set up the path and data -------------------------------------------------
metadata <- read.csv(
@ -6,10 +5,8 @@ metadata <- read.csv(
stringsAsFactors = TRUE
)
records <- read.csv(
get_data_location("ExampleProject_DATA_2018-06-07_1129.csv"),
stringsAsFactors = TRUE
)
records <- read.csv(get_data_location("ExampleProject_DATA_2018-06-07_1129.csv"),
stringsAsFactors = TRUE)
redcap_output_csv1 <- REDCap_split(records, metadata)
@ -21,7 +18,8 @@ test_that("CSV export matches reference", {
# Test that R code enhanced CSV export matches reference --------------------
if (requireNamespace("Hmisc", quietly = TRUE)) {
test_that("R code enhanced export matches reference", {
redcap_output_csv2 <- REDCap_split(REDCap_process_csv(records), metadata)
redcap_output_csv2 <-
REDCap_split(REDCap_process_csv(records), metadata)
expect_known_hash(redcap_output_csv2, "34f82cab35bf8aae47d08cd96f743e6b")
})
@ -29,33 +27,22 @@ if (requireNamespace("Hmisc", quietly = TRUE)) {
if (requireNamespace("readr", quietly = TRUE)) {
context("Compatibility with readr")
metadata <- readr::read_csv(
get_data_location(
"ExampleProject_DataDictionary_2018-06-07.csv"
)
)
metadata <- readr::read_csv(get_data_location("ExampleProject_DataDictionary_2018-06-07.csv"))
records <- readr::read_csv(
get_data_location(
"ExampleProject_DATA_2018-06-07_1129.csv"
)
)
records <- readr::read_csv(get_data_location("ExampleProject_DATA_2018-06-07_1129.csv"))
redcap_output_readr <- REDCap_split(records, metadata)
expect_matching_elements <- function(FUN) {
FUN <- match.fun(FUN)
expect_identical(
lapply(redcap_output_readr, FUN),
lapply(redcap_output_csv1, FUN)
)
expect_identical(lapply(redcap_output_readr, FUN),
lapply(redcap_output_csv1, FUN))
}
test_that("Result of data read in with `readr` will match result with `read.csv`", {
test_that("Result of data read in with `readr` will match result with `read.csv`",
{
# The list itself
expect_identical(length(redcap_output_readr), length(redcap_output_csv1))
expect_identical(names(redcap_output_readr), names(redcap_output_csv1))

View File

@ -1,63 +1,57 @@
context("Using the `forms = 'all'` argument")
# Global variables --------------------------------------------------------
# Cars
metadata <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_metadata.json"
)
)
metadata <-
jsonlite::fromJSON(get_data_location("ExampleProject_metadata.json"))
records <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_records.json"
)
)
records <-
jsonlite::fromJSON(get_data_location("ExampleProject_records.json"))
redcap_output_json <- REDCap_split(records, metadata, forms = "all")
# Longitudinal
file_paths <- sapply(
c(
records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv",
metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv"
), get_data_location
c(records = "WARRIORtestForSoftwa_DATA_2018-06-21_1431.csv",
metadata = "WARRIORtestForSoftwareUpgrades_DataDictionary_2018-06-21.csv"),
get_data_location
)
redcap <- lapply(file_paths, read.csv, stringsAsFactors = FALSE)
redcap[["metadata"]] <- with(redcap, metadata[metadata[, 1] > "",])
redcap_output_long <- with(redcap, REDCap_split(records, metadata, forms = "all"))
redcap_output_long <-
with(redcap, REDCap_split(records, metadata, forms = "all"))
redcap_long_names <- names(redcap[[1]])
# Tests -------------------------------------------------------------------
test_that("Each form is an element in the list", {
expect_length(redcap_output_json, 3L)
expect_identical(names(redcap_output_json), c("motor_trend_cars", "grouping", "sale"))
expect_identical(names(redcap_output_json),
c("motor_trend_cars", "grouping", "sale"))
})
test_that("All variables land somewhere", {
expect_true(setequal(names(records), Reduce("union", sapply(redcap_output_json, names))))
expect_true(setequal(names(records), Reduce(
"union", sapply(redcap_output_json, names)
)))
})
test_that("Primary table name is ignored", {
expect_identical(
REDCap_split(records, metadata, "HELLO", "all"),
redcap_output_json
)
expect_identical(REDCap_split(records, metadata, "HELLO", "all"),
redcap_output_json)
})
test_that("Supports longitudinal data", {
# setdiff(redcap_long_names, Reduce("union", sapply(redcap_output_long, names)))
## [1] "informed_consent_and_addendum_timestamp"
expect_true(setequal(redcap_long_names, Reduce("union", sapply(redcap_output_long, names))))
expect_true(setequal(redcap_long_names, Reduce(
"union", sapply(redcap_output_long, names)
)))
})

View File

@ -1,4 +1,4 @@
context("Longitudinal data")
## "Longitudinal data"
test_that("CSV export matches reference", {
file_paths <- sapply(

View File

@ -1,28 +1,17 @@
context("Primary table name processing")
## "Primary table name processing"
# Global variables -------------------------------------------------------
metadata <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_metadata.json"
)
)
metadata <- jsonlite::fromJSON(get_data_location("ExampleProject_metadata.json"))
records <- jsonlite::fromJSON(
get_data_location(
"ExampleProject_records.json"
)
)
records <- jsonlite::fromJSON(get_data_location("ExampleProject_records.json"))
ref_hash <- "2c8b6531597182af1248f92124161e0c"
# Tests -------------------------------------------------------------------
test_that("Will not use a repeating instrument name for primary table", {
redcap_output_json1 <- expect_warning(
REDCap_split(records, metadata, "sale"),
"primary table"
)
redcap_output_json1 <- expect_warning(REDCap_split(records, metadata, "sale"),
"primary table")
expect_known_hash(redcap_output_json1, ref_hash)