diff --git a/DESCRIPTION b/DESCRIPTION index 66f6805..91e9f9d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: REDCapCAST Title: REDCap Castellated Data Handling -Version: 23.6.1 +Version: 23.6.2 Authors@R: c( person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7559-1154")), @@ -48,6 +48,7 @@ Collate: 'utils.r' 'process_user_input.r' 'REDCap_split.r' + 'ds2dd.R' 'read_redcap_tables.R' 'redcap_wider.R' 'redcapcast_data.R' diff --git a/NAMESPACE b/NAMESPACE index 6ea6314..4aa7269 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,12 +2,15 @@ export(REDCap_split) export(clean_redcap_name) +export(d2w) +export(ds2dd) export(focused_metadata) export(match_fields_to_form) export(read_redcap_tables) export(redcap_wider) export(sanitize_split) export(split_non_repeating_forms) +export(strsplitx) importFrom(REDCapR,redcap_event_instruments) importFrom(REDCapR,redcap_metadata_read) importFrom(REDCapR,redcap_read) diff --git a/NEWS.md b/NEWS.md index 716d640..655d8dd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,16 @@ +# REDCapCAST 23.6.2 + +This version marks the introduction of a few helper functions to handle database creation. + +### Functions + +* New: `ds2dd()` function migrating from the `stRoke`-package. Assists in building a data dictionary for REDCap from a dataset. + +* New: `strsplitx()` function to ease the string splitting as an extension of `base::strsplit()`. Inspiration from https://stackoverflow.com/a/11014253/21019325 and https://www.r-bloggers.com/2018/04/strsplit-but-keeping-the-delimiter/. + +* New: `d2n()` function converts single digits to written numbers. Used to sanitize variable and form names in REDCap database creation. For more universal number to word I would suggest `english::word()` or `xfun::numbers_to_words()`, though I have not been testing these. + + # REDCapCAST 23.6.1 ### Documentation: diff --git a/R/ds2dd.R b/R/ds2dd.R new file mode 100644 index 0000000..60ec378 --- /dev/null +++ b/R/ds2dd.R @@ -0,0 +1,84 @@ +utils::globalVariables(c("redcapcast_meta")) +#' Data set to data dictionary function +#' +#' Migrated from stRoke ds2dd(). Fits better with the functionality of +#' 'REDCapCAST' +#' @param ds data set +#' @param record.id name or column number of id variable, moved to first row of +#' data dictionary, character of integer. Default is "record_id". +#' @param form.name vector of form names, character string, length 1 or length +#' equal to number of variables. Default is "basis". +#' @param field.type vector of field types, character string, length 1 or length +#' equal to number of variables. Default is "text. +#' @param field.label vector of form names, character string, length 1 or length +#' equal to number of variables. Default is NULL and is then identical to field +#' names. +#' @param include.column.names Flag to give detailed output including new +#' column names for original data set for upload. +#' @param metadata Metadata column names. Default is the included +#' REDCapCAST::redcapcast_data. +#' +#' @return data.frame or list of data.frame and vector +#' @export +#' +#' @examples +#' redcapcast_data$record_id <- seq_len(nrow(redcapcast_data)) +#' ds2dd(redcapcast_data, include.column.names=TRUE) + +ds2dd <- + function(ds, + record.id = "record_id", + form.name = "basis", + field.type = "text", + field.label = NULL, + include.column.names = FALSE, + metadata = names(redcapcast_meta)) { + dd <- data.frame(matrix(ncol = length(metadata), nrow = ncol(ds))) + colnames(dd) <- metadata + + if (is.character(record.id) & !record.id %in% colnames(ds)) { + stop("Provided record.id is not a variable name in provided data set.") + } + + # renaming to lower case and substitute spaces with underscore + field.name <- gsub(" ", "_", tolower(colnames(ds))) + + # handles both character and integer + colsel <- + colnames(ds) == colnames(ds[record.id]) + + if (summary(colsel)[3] != 1) { + stop("Provided record.id has to be or refer to a uniquely named column.") + } + + dd[, "field_name"] <- + c(field.name[colsel], field.name[!colsel]) + + if (length(form.name) > 1 & length(form.name) != ncol(ds)) { + stop( + "Provided form.name should be of length 1 (value is reused) or equal + length as number of variables in data set." + ) + } + dd[, "form_name"] <- form.name + + if (length(field.type) > 1 & length(field.type) != ncol(ds)) { + stop( + "Provided field.type should be of length 1 (value is reused) or equal + length as number of variables in data set." + ) + } + + dd[, "field_type"] <- field.type + + if (is.null(field.label)) { + dd[, "field_label"] <- dd[, "field_name"] + } else + dd[, "field_label"] <- field.label + + if (include.column.names){ + list("DataDictionary"=dd,"Column names"=field.name) + } else dd + } + + diff --git a/R/utils.r b/R/utils.r index 398c639..d26ba16 100644 --- a/R/utils.r +++ b/R/utils.r @@ -276,3 +276,141 @@ split_non_repeating_forms <- structure(x, names = forms) } + + +#' Extended string splitting +#' +#' Can be used as a substitute of the base function. Main claim to fame is +#' easing the split around the defined delimiter, see example. +#' @param x data +#' @param split delimiter +#' @param type Split type. Can be c("classic", "before", "after", "around") +#' @param perl perl param from strsplit() +#' @param ... additional parameters are passed to base strsplit handling splits +#' +#' @return list +#' @export +#' +#' @examples +#' test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now") +#' strsplitx(test,"[0-9]",type="around") +strsplitx <- function(x, + split, + type = "classic", + perl = FALSE, + ...) { + if (type == "classic") { + # use base::strsplit + out <- base::strsplit(x = x, split = split, perl = perl, ...) + } else if (type == "before") { + # split before the delimiter and keep it + out <- base::strsplit(x = x, + split = paste0("(?<=.)(?=", split, ")"), + perl = TRUE, + ...) + } else if (type == "after") { + # split after the delimiter and keep it + out <- base::strsplit(x = x, + split = paste0("(?<=", split, ")"), + perl = TRUE, + ...) + } else if (type == "around") { + # split around the defined delimiter + + out <- base::strsplit(gsub("~~", "~", # Removes double ~ + gsub("^~", "", # Removes leading ~ + gsub( + # Splits and inserts ~ at all delimiters + paste0("(", split, ")"), "~\\1~", x + ))), "~") + + } else { + # wrong type input + stop("type must be 'classic', 'after', 'before' or 'around'!") + } + + out +} + +#' Convert single digits to words +#' +#' @param x data. Handle vectors, data.frames and lists +#' @param lang language. Danish (da) and English (en), Default is "en" +#' @param neutrum for numbers depending on counted word +#' @param everything flag to also split numbers >9 to single digits +#' +#' @return returns characters in same format as input +#' @export +#' +#' @examples +#' d2w(c(2:8,21)) +#' d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE) +#' +#' ## If everything=T, also larger numbers are reduced. +#' ## Elements in the list are same length as input +#' d2w(list(2:8,c(2,6,4,23),2), everything=TRUE) +#' +d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) { + + # In Danish the written 1 depends on the counted word + if (neutrum) nt <- "t" else nt <- "n" + + # A sapply() call with nested lapply() to handle vectors, data.frames and lists + convert <- function(x, lang, neutrum) { + zero_nine = data.frame( + num = 0:9, + en = c( + 'zero', + 'one', + 'two', + 'three', + 'four', + 'five', + 'six', + 'seven', + 'eight', + 'nine' + ), + da = c( + "nul", + paste0("e",nt), + "to", + "tre", + "fire", + "fem", + "seks", + "syv", + "otte", + "ni" + ) + ) + + wrd <- lapply(x, function(i) { + zero_nine[, tolower(lang)][zero_nine[, 1] == i] + }) + + sub <- lengths(wrd) == 1 + + x[sub] <- wrd[sub] + + unlist(x) + } + + # Also converts numbers >9 to single digits and writes out + # Uses strsplitx() + if (everything) { + out <- sapply(x,function(y){ + do.call(c,lapply(y,function(z){ + v <- strsplitx(z,"[0-9]",type="around") + Reduce(paste,sapply(v,convert,lang = lang, neutrum = neutrum)) + })) + + }) + } else { + out <- sapply(x,convert,lang = lang, neutrum = neutrum) + } + + if (is.data.frame(x)) out <- data.frame(out) + + out +} diff --git a/cran-comments.md b/cran-comments.md index 8d961b6..04d734b 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,54 +1,41 @@ -## Test environments -- R-hub windows-x86_64-devel (r-devel) -- R-hub ubuntu-gcc-release (r-release) -- R-hub fedora-clang-devel (r-devel) +## rhub::check_for_cran() results -## R CMD check results -❯ On windows-x86_64-devel (r-devel) - checking CRAN incoming feasibility ... [17s] NOTE - Maintainer: 'Andreas Gammelgaard Damsbo ' - - New submission - - Possibly misspelled words in DESCRIPTION: - Egeler (8:45) - REDCap (2:8, 10:39, 11:30, 14:5) - REDCapRITS (8:26) - interoperability (19:44) +── REDCapCAST 23.6.2: NOTE -❯ On windows-x86_64-devel (r-devel) - checking for non-standard things in the check directory ... NOTE - Found the following files/directories: + Build ID: REDCapCAST_23.6.2.tar.gz-a738190c0d8a4e76b9212e4915625f96 + Platform: Windows Server 2022, R-devel, 64 bit + Submitted: 56m 54.3s ago + Build time: 4m 25.1s -❯ On windows-x86_64-devel (r-devel) - checking for detritus in the temp directory ... NOTE +❯ checking for non-standard things in the check directory ... NOTE + ''NULL'' + +❯ checking for detritus in the temp directory ... NOTE Found the following files/directories: 'lastMiKTeXException' -❯ On ubuntu-gcc-release (r-release) - checking CRAN incoming feasibility ... [6s/24s] NOTE - Maintainer: ‘Andreas Gammelgaard Damsbo ’ - - New submission - - Possibly misspelled words in DESCRIPTION: - Egeler (8:45) - REDCap (2:8, 10:39, 11:30, 14:5) - REDCapRITS (8:26) +0 errors ✔ | 0 warnings ✔ | 2 notes ✖ -❯ On ubuntu-gcc-release (r-release), fedora-clang-devel (r-devel) - checking HTML version of manual ... NOTE +── REDCapCAST 23.6.2: NOTE + + Build ID: REDCapCAST_23.6.2.tar.gz-a9243a74abae4f04b2a0e29a2751c420 + Platform: Ubuntu Linux 20.04.1 LTS, R-release, GCC + Submitted: 56m 54.4s ago + Build time: 32m 6.5s + +❯ checking HTML version of manual ... NOTE Skipping checking HTML validation: no command 'tidy' found -❯ On fedora-clang-devel (r-devel) - checking CRAN incoming feasibility ... [7s/21s] NOTE - Maintainer: ‘Andreas Gammelgaard Damsbo ’ - - New submission - - Possibly misspelled words in DESCRIPTION: - Egeler (8:45) - REDCap (2:8, 10:39, 11:30, 14:5) - REDCapRITS (8:26) +0 errors ✔ | 0 warnings ✔ | 1 note ✖ -0 errors ✔ | 0 warnings ✔ | 6 notes ✖ +── REDCapCAST 23.6.2: NOTE + + Build ID: REDCapCAST_23.6.2.tar.gz-ba8ade3478c6494b8a8daee08a502f2b + Platform: Fedora Linux, R-devel, clang, gfortran + Submitted: 56m 54.4s ago + Build time: 29m 25.6s + +❯ checking HTML version of manual ... NOTE + Skipping checking HTML validation: no command 'tidy' found + +0 errors ✔ | 0 warnings ✔ | 1 note ✖ diff --git a/inst/WORDLIST b/inst/WORDLIST index 505c062..f606009 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,11 +1,13 @@ Assesment CMD Codecov +DOI DataDictionary GStat GithubActions JSON Lifecycle +METACRAN Pivotting README REDCap @@ -14,11 +16,13 @@ REDCapRITS SpectrumHealthResearch al api +da demonstrational descirption desireable doi dplyr +ds et immprovements jbi @@ -27,6 +31,11 @@ matadata md nad og +param +perl +redcapcast +stRoke +strsplit thorugh tidyverse uri diff --git a/man/d2w.Rd b/man/d2w.Rd new file mode 100644 index 0000000..7437a04 --- /dev/null +++ b/man/d2w.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{d2w} +\alias{d2w} +\title{Convert single digits to words} +\usage{ +d2w(x, lang = "en", neutrum = FALSE, everything = FALSE) +} +\arguments{ +\item{x}{data. Handle vectors, data.frames and lists} + +\item{lang}{language. Danish (da) and English (en), Default is "en"} + +\item{neutrum}{for numbers depending on counted word} + +\item{everything}{flag to also split numbers >9 to single digits} +} +\value{ +returns characters in same format as input +} +\description{ +Convert single digits to words +} +\examples{ +d2w(c(2:8,21)) +d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE) + +## If everything=T, also larger numbers are reduced. +## Elements in the list are same length as input +d2w(list(2:8,c(2,6,4,23),2), everything=TRUE) + +} diff --git a/man/ds2dd.Rd b/man/ds2dd.Rd new file mode 100644 index 0000000..c824fb5 --- /dev/null +++ b/man/ds2dd.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds2dd.R +\name{ds2dd} +\alias{ds2dd} +\title{Data set to data dictionary function} +\usage{ +ds2dd( + ds, + record.id = "record_id", + form.name = "basis", + field.type = "text", + field.label = NULL, + include.column.names = FALSE, + metadata = names(redcapcast_meta) +) +} +\arguments{ +\item{ds}{data set} + +\item{record.id}{name or column number of id variable, moved to first row of +data dictionary, character of integer. Default is "record_id".} + +\item{form.name}{vector of form names, character string, length 1 or length +equal to number of variables. Default is "basis".} + +\item{field.type}{vector of field types, character string, length 1 or length +equal to number of variables. Default is "text.} + +\item{field.label}{vector of form names, character string, length 1 or length +equal to number of variables. Default is NULL and is then identical to field +names.} + +\item{include.column.names}{Flag to give detailed output including new +column names for original data set for upload.} + +\item{metadata}{Metadata column names. Default is the included +REDCapCAST::redcapcast_data.} +} +\value{ +data.frame or list of data.frame and vector +} +\description{ +Migrated from stRoke ds2dd(). Fits better with the functionality of +'REDCapCAST' +} +\examples{ +redcapcast_data$record_id <- seq_len(nrow(redcapcast_data)) +ds2dd(redcapcast_data, include.column.names=TRUE) +} diff --git a/man/strsplitx.Rd b/man/strsplitx.Rd new file mode 100644 index 0000000..285c39e --- /dev/null +++ b/man/strsplitx.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.r +\name{strsplitx} +\alias{strsplitx} +\title{Extended string splitting} +\usage{ +strsplitx(x, split, type = "classic", perl = FALSE, ...) +} +\arguments{ +\item{x}{data} + +\item{split}{delimiter} + +\item{type}{Split type. Can be c("classic", "before", "after", "around")} + +\item{perl}{perl param from strsplit()} + +\item{...}{additional parameters are passed to base strsplit handling splits} +} +\value{ +list +} +\description{ +Can be used as a substitute of the base function. Main claim to fame is +easing the split around the defined delimiter, see example. +} +\examples{ +test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now") +strsplitx(test,"[0-9]",type="around") +} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 0000000..e13ec83 --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,20 @@ +test_that("strsplitx works", { + expect_equal(2 * 2, 4) + test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now") + expect_length(strsplitx(test,"[0-9]",type="around")[[1]],3) + + expect_equal(strsplitx(test,"[0-9]",type="classic")[[2]][1],"") + expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4) + + expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4) +}) + +test_that("d2w works", { + + expect_length(d2w(c(2:8,21)),8) + + expect_equal(d2w(data.frame(2:7,3:8,1),lang="da", + neutrum=TRUE)[1,3],"et") + + expect_equal(d2w(list(2:8,c(2,6,4,23),2), everything=T)[[2]][4],"two three") +})