three new functions and new version

2025-04-22 22:22:34 +02:00 · 2023-07-04 16:03:06 +02:00 · 2023-07-04 16:03:06 +02:00 · 8bd4d9ade7
commit 8bd4d9ade7
parent ee396fb811
11 changed files with 411 additions and 45 deletions
--- a/3
+++ b/3
@ -1,6 +1,6 @@
 Package: REDCapCAST
 Title: REDCap Castellated Data Handling
-Version: 23.6.1
+Version: 23.6.2
 Authors@R: c(
    person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("aut", "cre"),
           comment = c(ORCID = "0000-0002-7559-1154")),
@ -48,6 +48,7 @@ Collate:
    'utils.r'
    'process_user_input.r'
    'REDCap_split.r'
+    'ds2dd.R'
    'read_redcap_tables.R'
    'redcap_wider.R'
    'redcapcast_data.R'
--- a/3
+++ b/3
@ -2,12 +2,15 @@

 export(REDCap_split)
 export(clean_redcap_name)
+export(d2w)
+export(ds2dd)
 export(focused_metadata)
 export(match_fields_to_form)
 export(read_redcap_tables)
 export(redcap_wider)
 export(sanitize_split)
 export(split_non_repeating_forms)
+export(strsplitx)
 importFrom(REDCapR,redcap_event_instruments)
 importFrom(REDCapR,redcap_metadata_read)
 importFrom(REDCapR,redcap_read)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,3 +1,16 @@
+# REDCapCAST 23.6.2
+
+This version marks the introduction of a few helper functions to handle database creation.
+
+### Functions
+
+* New: `ds2dd()` function migrating from the `stRoke`-package. Assists in building a data dictionary for REDCap from a dataset.
+
+* New: `strsplitx()` function to ease the string splitting as an extension of `base::strsplit()`. Inspiration from https://stackoverflow.com/a/11014253/21019325 and https://www.r-bloggers.com/2018/04/strsplit-but-keeping-the-delimiter/. 
+
+* New: `d2n()` function converts single digits to written numbers. Used to sanitize variable and form names in REDCap database creation. For more universal number to word I would suggest `english::word()` or `xfun::numbers_to_words()`, though I have not been testing these.
+
+
 # REDCapCAST 23.6.1

 ### Documentation:
--- a/R/ds2dd.R
+++ b/R/ds2dd.R
@ -0,0 +1,84 @@
+utils::globalVariables(c("redcapcast_meta"))
+#' Data set to data dictionary function
+#'
+#' Migrated from stRoke ds2dd(). Fits better with the functionality of
+#' 'REDCapCAST'
+#' @param ds data set
+#' @param record.id name or column number of id variable, moved to first row of
+#' data dictionary, character of integer. Default is "record_id".
+#' @param form.name vector of form names, character string, length 1 or length
+#' equal to number of variables. Default is "basis".
+#' @param field.type vector of field types, character string, length 1 or length
+#' equal to number of variables. Default is "text.
+#' @param field.label vector of form names, character string, length 1 or length
+#' equal to number of variables. Default is NULL and is then identical to field
+#' names.
+#' @param include.column.names Flag to give detailed output including new
+#' column names for original data set for upload.
+#' @param metadata Metadata column names. Default is the included
+#' REDCapCAST::redcapcast_data.
+#'
+#' @return data.frame or list of data.frame and vector
+#' @export
+#'
+#' @examples
+#' redcapcast_data$record_id <- seq_len(nrow(redcapcast_data))
+#' ds2dd(redcapcast_data, include.column.names=TRUE)
+
+ds2dd <-
+  function(ds,
+           record.id = "record_id",
+           form.name = "basis",
+           field.type = "text",
+           field.label = NULL,
+           include.column.names = FALSE,
+           metadata = names(redcapcast_meta)) {
+    dd <- data.frame(matrix(ncol = length(metadata), nrow = ncol(ds)))
+    colnames(dd) <- metadata
+
+    if (is.character(record.id) & !record.id %in% colnames(ds)) {
+      stop("Provided record.id is not a variable name in provided data set.")
+    }
+
+    # renaming to lower case and substitute spaces with underscore
+    field.name <- gsub(" ", "_", tolower(colnames(ds)))
+
+    # handles both character and integer
+    colsel <-
+      colnames(ds) == colnames(ds[record.id])
+
+    if (summary(colsel)[3] != 1) {
+      stop("Provided record.id has to be or refer to a uniquely named column.")
+    }
+
+    dd[, "field_name"] <-
+      c(field.name[colsel], field.name[!colsel])
+
+    if (length(form.name) > 1 & length(form.name) != ncol(ds)) {
+      stop(
+        "Provided form.name should be of length 1 (value is reused) or equal
+        length as number of variables in data set."
+      )
+    }
+    dd[, "form_name"] <- form.name
+
+    if (length(field.type) > 1 & length(field.type) != ncol(ds)) {
+      stop(
+        "Provided field.type should be of length 1 (value is reused) or equal
+        length as number of variables in data set."
+      )
+    }
+
+    dd[, "field_type"] <- field.type
+
+    if (is.null(field.label)) {
+      dd[, "field_label"] <- dd[, "field_name"]
+    } else
+      dd[, "field_label"] <- field.label
+
+    if (include.column.names){
+      list("DataDictionary"=dd,"Column names"=field.name)
+    } else dd
+  }
+
+
--- a/R/utils.r
+++ b/R/utils.r
@ -276,3 +276,141 @@ split_non_repeating_forms <-
    structure(x, names = forms)

  }
+
+
+#' Extended string splitting
+#'
+#' Can be used as a substitute of the base function. Main claim to fame is
+#' easing the split around the defined delimiter, see example.
+#' @param x data
+#' @param split delimiter
+#' @param type Split type. Can be c("classic", "before", "after", "around")
+#' @param perl perl param from strsplit()
+#' @param ... additional parameters are passed to base strsplit handling splits
+#'
+#' @return list
+#' @export
+#'
+#' @examples
+#' test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
+#' strsplitx(test,"[0-9]",type="around")
+strsplitx <- function(x,
+                      split,
+                      type = "classic",
+                      perl = FALSE,
+                      ...) {
+  if (type == "classic") {
+    # use base::strsplit
+    out <- base::strsplit(x = x, split = split, perl = perl, ...)
+  } else if (type == "before") {
+    # split before the delimiter and keep it
+    out <- base::strsplit(x = x,
+                          split = paste0("(?<=.)(?=", split, ")"),
+                          perl = TRUE,
+                          ...)
+  } else if (type == "after") {
+    # split after the delimiter and keep it
+    out <- base::strsplit(x = x,
+                          split = paste0("(?<=", split, ")"),
+                          perl = TRUE,
+                          ...)
+  } else if (type == "around") {
+    # split around the defined delimiter
+
+    out <- base::strsplit(gsub("~~", "~", # Removes double ~
+                               gsub("^~", "", # Removes leading ~
+                                    gsub(
+                                      # Splits and inserts ~ at all delimiters
+                                      paste0("(", split, ")"), "~\\1~", x
+                                    ))), "~")
+
+  } else {
+    # wrong type input
+    stop("type must be 'classic', 'after', 'before' or 'around'!")
+  }
+
+  out
+}
+
+#' Convert single digits to words
+#'
+#' @param x data. Handle vectors, data.frames and lists
+#' @param lang language. Danish (da) and English (en), Default is "en"
+#' @param neutrum for numbers depending on counted word
+#' @param everything flag to also split numbers >9 to single digits
+#'
+#' @return returns characters in same format as input
+#' @export
+#'
+#' @examples
+#' d2w(c(2:8,21))
+#' d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE)
+#'
+#' ## If everything=T, also larger numbers are reduced.
+#' ## Elements in the list are same length as input
+#' d2w(list(2:8,c(2,6,4,23),2), everything=TRUE)
+#'
+d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) {
+
+  # In Danish the written 1 depends on the counted word
+  if (neutrum) nt <- "t" else nt <- "n"
+
+  # A sapply() call with nested lapply() to handle vectors, data.frames and lists
+  convert <- function(x, lang, neutrum) {
+    zero_nine = data.frame(
+      num = 0:9,
+      en = c(
+        'zero',
+        'one',
+        'two',
+        'three',
+        'four',
+        'five',
+        'six',
+        'seven',
+        'eight',
+        'nine'
+      ),
+      da = c(
+        "nul",
+        paste0("e",nt),
+        "to",
+        "tre",
+        "fire",
+        "fem",
+        "seks",
+        "syv",
+        "otte",
+        "ni"
+      )
+    )
+
+    wrd <- lapply(x, function(i) {
+      zero_nine[, tolower(lang)][zero_nine[, 1] == i]
+    })
+
+    sub <- lengths(wrd) == 1
+
+    x[sub] <- wrd[sub]
+
+    unlist(x)
+  }
+
+  # Also converts numbers >9 to single digits and writes out
+  # Uses strsplitx()
+  if (everything) {
+    out <- sapply(x,function(y){
+      do.call(c,lapply(y,function(z){
+        v <- strsplitx(z,"[0-9]",type="around")
+        Reduce(paste,sapply(v,convert,lang = lang, neutrum = neutrum))
+      }))
+
+    })
+  } else {
+  out <- sapply(x,convert,lang = lang, neutrum = neutrum)
+  }
+
+  if (is.data.frame(x)) out <- data.frame(out)
+
+  out
+}
--- a/cran-comments.md
+++ b/cran-comments.md
@ -1,54 +1,41 @@
-## Test environments
- R-hub windows-x86_64-devel (r-devel)
- R-hub ubuntu-gcc-release (r-release)
- R-hub fedora-clang-devel (r-devel)
+## rhub::check_for_cran() results

-## R CMD check results
-❯ On windows-x86_64-devel (r-devel)
-  checking CRAN incoming feasibility ... [17s] NOTE
-  Maintainer: 'Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>'
-  
-  New submission
-  
-  Possibly misspelled words in DESCRIPTION:
-    Egeler (8:45)
-    REDCap (2:8, 10:39, 11:30, 14:5)
-    REDCapRITS (8:26)
-    interoperability (19:44)
+── REDCapCAST 23.6.2: NOTE

-❯ On windows-x86_64-devel (r-devel)
-  checking for non-standard things in the check directory ... NOTE
-  Found the following files/directories:
+  Build ID:   REDCapCAST_23.6.2.tar.gz-a738190c0d8a4e76b9212e4915625f96
+  Platform:   Windows Server 2022, R-devel, 64 bit
+  Submitted:  56m 54.3s ago
+  Build time: 4m 25.1s

-❯ On windows-x86_64-devel (r-devel)
-  checking for detritus in the temp directory ... NOTE
+❯ checking for non-standard things in the check directory ... NOTE
+    ''NULL''
+
+❯ checking for detritus in the temp directory ... NOTE
  Found the following files/directories:
    'lastMiKTeXException'

-❯ On ubuntu-gcc-release (r-release)
-  checking CRAN incoming feasibility ... [6s/24s] NOTE
-  Maintainer: ‘Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>’
-  
-  New submission
-  
-  Possibly misspelled words in DESCRIPTION:
-    Egeler (8:45)
-    REDCap (2:8, 10:39, 11:30, 14:5)
-    REDCapRITS (8:26)
+0 errors ✔ | 0 warnings ✔ | 2 notes ✖

-❯ On ubuntu-gcc-release (r-release), fedora-clang-devel (r-devel)
-  checking HTML version of manual ... NOTE
+── REDCapCAST 23.6.2: NOTE
+
+  Build ID:   REDCapCAST_23.6.2.tar.gz-a9243a74abae4f04b2a0e29a2751c420
+  Platform:   Ubuntu Linux 20.04.1 LTS, R-release, GCC
+  Submitted:  56m 54.4s ago
+  Build time: 32m 6.5s
+
+❯ checking HTML version of manual ... NOTE
  Skipping checking HTML validation: no command 'tidy' found

-❯ On fedora-clang-devel (r-devel)
-  checking CRAN incoming feasibility ... [7s/21s] NOTE
-  Maintainer: ‘Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>’
-  
-  New submission
-  
-  Possibly misspelled words in DESCRIPTION:
-    Egeler (8:45)
-    REDCap (2:8, 10:39, 11:30, 14:5)
-    REDCapRITS (8:26)
+0 errors ✔ | 0 warnings ✔ | 1 note ✖

-0 errors ✔ | 0 warnings ✔ | 6 notes ✖
+── REDCapCAST 23.6.2: NOTE
+
+  Build ID:   REDCapCAST_23.6.2.tar.gz-ba8ade3478c6494b8a8daee08a502f2b
+  Platform:   Fedora Linux, R-devel, clang, gfortran
+  Submitted:  56m 54.4s ago
+  Build time: 29m 25.6s
+
+❯ checking HTML version of manual ... NOTE
+  Skipping checking HTML validation: no command 'tidy' found
+
+0 errors ✔ | 0 warnings ✔ | 1 note ✖
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@ -1,11 +1,13 @@
 Assesment
 CMD
 Codecov
+DOI
 DataDictionary
 GStat
 GithubActions
 JSON
 Lifecycle
+METACRAN
 Pivotting
 README
 REDCap
@ -14,11 +16,13 @@ REDCapRITS
 SpectrumHealthResearch
 al
 api
+da
 demonstrational
 descirption
 desireable
 doi
 dplyr
+ds
 et
 immprovements
 jbi
@ -27,6 +31,11 @@ matadata
 md
 nad
 og
+param
+perl
+redcapcast
+stRoke
+strsplit
 thorugh
 tidyverse
 uri
--- a/man/d2w.Rd
+++ b/man/d2w.Rd
@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.r
+\name{d2w}
+\alias{d2w}
+\title{Convert single digits to words}
+\usage{
+d2w(x, lang = "en", neutrum = FALSE, everything = FALSE)
+}
+\arguments{
+\item{x}{data. Handle vectors, data.frames and lists}
+
+\item{lang}{language. Danish (da) and English (en), Default is "en"}
+
+\item{neutrum}{for numbers depending on counted word}
+
+\item{everything}{flag to also split numbers >9 to single digits}
+}
+\value{
+returns characters in same format as input
+}
+\description{
+Convert single digits to words
+}
+\examples{
+d2w(c(2:8,21))
+d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE)
+
+## If everything=T, also larger numbers are reduced.
+## Elements in the list are same length as input
+d2w(list(2:8,c(2,6,4,23),2), everything=TRUE)
+
+}
--- a/man/ds2dd.Rd
+++ b/man/ds2dd.Rd
@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds2dd.R
+\name{ds2dd}
+\alias{ds2dd}
+\title{Data set to data dictionary function}
+\usage{
+ds2dd(
+  ds,
+  record.id = "record_id",
+  form.name = "basis",
+  field.type = "text",
+  field.label = NULL,
+  include.column.names = FALSE,
+  metadata = names(redcapcast_meta)
+)
+}
+\arguments{
+\item{ds}{data set}
+
+\item{record.id}{name or column number of id variable, moved to first row of
+data dictionary, character of integer. Default is "record_id".}
+
+\item{form.name}{vector of form names, character string, length 1 or length
+equal to number of variables. Default is "basis".}
+
+\item{field.type}{vector of field types, character string, length 1 or length
+equal to number of variables. Default is "text.}
+
+\item{field.label}{vector of form names, character string, length 1 or length
+equal to number of variables. Default is NULL and is then identical to field
+names.}
+
+\item{include.column.names}{Flag to give detailed output including new
+column names for original data set for upload.}
+
+\item{metadata}{Metadata column names. Default is the included
+REDCapCAST::redcapcast_data.}
+}
+\value{
+data.frame or list of data.frame and vector
+}
+\description{
+Migrated from stRoke ds2dd(). Fits better with the functionality of
+'REDCapCAST'
+}
+\examples{
+redcapcast_data$record_id <- seq_len(nrow(redcapcast_data))
+ds2dd(redcapcast_data, include.column.names=TRUE)
+}
--- a/man/strsplitx.Rd
+++ b/man/strsplitx.Rd
@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.r
+\name{strsplitx}
+\alias{strsplitx}
+\title{Extended string splitting}
+\usage{
+strsplitx(x, split, type = "classic", perl = FALSE, ...)
+}
+\arguments{
+\item{x}{data}
+
+\item{split}{delimiter}
+
+\item{type}{Split type. Can be c("classic", "before", "after", "around")}
+
+\item{perl}{perl param from strsplit()}
+
+\item{...}{additional parameters are passed to base strsplit handling splits}
+}
+\value{
+list
+}
+\description{
+Can be used as a substitute of the base function. Main claim to fame is
+easing the split around the defined delimiter, see example.
+}
+\examples{
+test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
+strsplitx(test,"[0-9]",type="around")
+}
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@ -0,0 +1,20 @@
+test_that("strsplitx works", {
+  expect_equal(2 * 2, 4)
+  test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
+  expect_length(strsplitx(test,"[0-9]",type="around")[[1]],3)
+
+  expect_equal(strsplitx(test,"[0-9]",type="classic")[[2]][1],"")
+  expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4)
+
+  expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4)
+})
+
+test_that("d2w works", {
+
+  expect_length(d2w(c(2:8,21)),8)
+
+  expect_equal(d2w(data.frame(2:7,3:8,1),lang="da",
+                              neutrum=TRUE)[1,3],"et")
+
+  expect_equal(d2w(list(2:8,c(2,6,4,23),2), everything=T)[[2]][4],"two three")
+})