three new functions and new version

This commit is contained in:
Andreas Gammelgaard Damsbo 2023-07-04 16:03:06 +02:00
parent ee396fb811
commit 8bd4d9ade7
11 changed files with 411 additions and 45 deletions

View File

@ -1,6 +1,6 @@
Package: REDCapCAST Package: REDCapCAST
Title: REDCap Castellated Data Handling Title: REDCap Castellated Data Handling
Version: 23.6.1 Version: 23.6.2
Authors@R: c( Authors@R: c(
person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("aut", "cre"), person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7559-1154")), comment = c(ORCID = "0000-0002-7559-1154")),
@ -48,6 +48,7 @@ Collate:
'utils.r' 'utils.r'
'process_user_input.r' 'process_user_input.r'
'REDCap_split.r' 'REDCap_split.r'
'ds2dd.R'
'read_redcap_tables.R' 'read_redcap_tables.R'
'redcap_wider.R' 'redcap_wider.R'
'redcapcast_data.R' 'redcapcast_data.R'

View File

@ -2,12 +2,15 @@
export(REDCap_split) export(REDCap_split)
export(clean_redcap_name) export(clean_redcap_name)
export(d2w)
export(ds2dd)
export(focused_metadata) export(focused_metadata)
export(match_fields_to_form) export(match_fields_to_form)
export(read_redcap_tables) export(read_redcap_tables)
export(redcap_wider) export(redcap_wider)
export(sanitize_split) export(sanitize_split)
export(split_non_repeating_forms) export(split_non_repeating_forms)
export(strsplitx)
importFrom(REDCapR,redcap_event_instruments) importFrom(REDCapR,redcap_event_instruments)
importFrom(REDCapR,redcap_metadata_read) importFrom(REDCapR,redcap_metadata_read)
importFrom(REDCapR,redcap_read) importFrom(REDCapR,redcap_read)

13
NEWS.md
View File

@ -1,3 +1,16 @@
# REDCapCAST 23.6.2
This version marks the introduction of a few helper functions to handle database creation.
### Functions
* New: `ds2dd()` function migrating from the `stRoke`-package. Assists in building a data dictionary for REDCap from a dataset.
* New: `strsplitx()` function to ease the string splitting as an extension of `base::strsplit()`. Inspiration from https://stackoverflow.com/a/11014253/21019325 and https://www.r-bloggers.com/2018/04/strsplit-but-keeping-the-delimiter/.
* New: `d2n()` function converts single digits to written numbers. Used to sanitize variable and form names in REDCap database creation. For more universal number to word I would suggest `english::word()` or `xfun::numbers_to_words()`, though I have not been testing these.
# REDCapCAST 23.6.1 # REDCapCAST 23.6.1
### Documentation: ### Documentation:

84
R/ds2dd.R Normal file
View File

@ -0,0 +1,84 @@
utils::globalVariables(c("redcapcast_meta"))
#' Data set to data dictionary function
#'
#' Migrated from stRoke ds2dd(). Fits better with the functionality of
#' 'REDCapCAST'
#' @param ds data set
#' @param record.id name or column number of id variable, moved to first row of
#' data dictionary, character of integer. Default is "record_id".
#' @param form.name vector of form names, character string, length 1 or length
#' equal to number of variables. Default is "basis".
#' @param field.type vector of field types, character string, length 1 or length
#' equal to number of variables. Default is "text.
#' @param field.label vector of form names, character string, length 1 or length
#' equal to number of variables. Default is NULL and is then identical to field
#' names.
#' @param include.column.names Flag to give detailed output including new
#' column names for original data set for upload.
#' @param metadata Metadata column names. Default is the included
#' REDCapCAST::redcapcast_data.
#'
#' @return data.frame or list of data.frame and vector
#' @export
#'
#' @examples
#' redcapcast_data$record_id <- seq_len(nrow(redcapcast_data))
#' ds2dd(redcapcast_data, include.column.names=TRUE)
ds2dd <-
function(ds,
record.id = "record_id",
form.name = "basis",
field.type = "text",
field.label = NULL,
include.column.names = FALSE,
metadata = names(redcapcast_meta)) {
dd <- data.frame(matrix(ncol = length(metadata), nrow = ncol(ds)))
colnames(dd) <- metadata
if (is.character(record.id) & !record.id %in% colnames(ds)) {
stop("Provided record.id is not a variable name in provided data set.")
}
# renaming to lower case and substitute spaces with underscore
field.name <- gsub(" ", "_", tolower(colnames(ds)))
# handles both character and integer
colsel <-
colnames(ds) == colnames(ds[record.id])
if (summary(colsel)[3] != 1) {
stop("Provided record.id has to be or refer to a uniquely named column.")
}
dd[, "field_name"] <-
c(field.name[colsel], field.name[!colsel])
if (length(form.name) > 1 & length(form.name) != ncol(ds)) {
stop(
"Provided form.name should be of length 1 (value is reused) or equal
length as number of variables in data set."
)
}
dd[, "form_name"] <- form.name
if (length(field.type) > 1 & length(field.type) != ncol(ds)) {
stop(
"Provided field.type should be of length 1 (value is reused) or equal
length as number of variables in data set."
)
}
dd[, "field_type"] <- field.type
if (is.null(field.label)) {
dd[, "field_label"] <- dd[, "field_name"]
} else
dd[, "field_label"] <- field.label
if (include.column.names){
list("DataDictionary"=dd,"Column names"=field.name)
} else dd
}

138
R/utils.r
View File

@ -276,3 +276,141 @@ split_non_repeating_forms <-
structure(x, names = forms) structure(x, names = forms)
} }
#' Extended string splitting
#'
#' Can be used as a substitute of the base function. Main claim to fame is
#' easing the split around the defined delimiter, see example.
#' @param x data
#' @param split delimiter
#' @param type Split type. Can be c("classic", "before", "after", "around")
#' @param perl perl param from strsplit()
#' @param ... additional parameters are passed to base strsplit handling splits
#'
#' @return list
#' @export
#'
#' @examples
#' test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
#' strsplitx(test,"[0-9]",type="around")
strsplitx <- function(x,
split,
type = "classic",
perl = FALSE,
...) {
if (type == "classic") {
# use base::strsplit
out <- base::strsplit(x = x, split = split, perl = perl, ...)
} else if (type == "before") {
# split before the delimiter and keep it
out <- base::strsplit(x = x,
split = paste0("(?<=.)(?=", split, ")"),
perl = TRUE,
...)
} else if (type == "after") {
# split after the delimiter and keep it
out <- base::strsplit(x = x,
split = paste0("(?<=", split, ")"),
perl = TRUE,
...)
} else if (type == "around") {
# split around the defined delimiter
out <- base::strsplit(gsub("~~", "~", # Removes double ~
gsub("^~", "", # Removes leading ~
gsub(
# Splits and inserts ~ at all delimiters
paste0("(", split, ")"), "~\\1~", x
))), "~")
} else {
# wrong type input
stop("type must be 'classic', 'after', 'before' or 'around'!")
}
out
}
#' Convert single digits to words
#'
#' @param x data. Handle vectors, data.frames and lists
#' @param lang language. Danish (da) and English (en), Default is "en"
#' @param neutrum for numbers depending on counted word
#' @param everything flag to also split numbers >9 to single digits
#'
#' @return returns characters in same format as input
#' @export
#'
#' @examples
#' d2w(c(2:8,21))
#' d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE)
#'
#' ## If everything=T, also larger numbers are reduced.
#' ## Elements in the list are same length as input
#' d2w(list(2:8,c(2,6,4,23),2), everything=TRUE)
#'
d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) {
# In Danish the written 1 depends on the counted word
if (neutrum) nt <- "t" else nt <- "n"
# A sapply() call with nested lapply() to handle vectors, data.frames and lists
convert <- function(x, lang, neutrum) {
zero_nine = data.frame(
num = 0:9,
en = c(
'zero',
'one',
'two',
'three',
'four',
'five',
'six',
'seven',
'eight',
'nine'
),
da = c(
"nul",
paste0("e",nt),
"to",
"tre",
"fire",
"fem",
"seks",
"syv",
"otte",
"ni"
)
)
wrd <- lapply(x, function(i) {
zero_nine[, tolower(lang)][zero_nine[, 1] == i]
})
sub <- lengths(wrd) == 1
x[sub] <- wrd[sub]
unlist(x)
}
# Also converts numbers >9 to single digits and writes out
# Uses strsplitx()
if (everything) {
out <- sapply(x,function(y){
do.call(c,lapply(y,function(z){
v <- strsplitx(z,"[0-9]",type="around")
Reduce(paste,sapply(v,convert,lang = lang, neutrum = neutrum))
}))
})
} else {
out <- sapply(x,convert,lang = lang, neutrum = neutrum)
}
if (is.data.frame(x)) out <- data.frame(out)
out
}

View File

@ -1,54 +1,41 @@
## Test environments ## rhub::check_for_cran() results
- R-hub windows-x86_64-devel (r-devel)
- R-hub ubuntu-gcc-release (r-release)
- R-hub fedora-clang-devel (r-devel)
## R CMD check results ── REDCapCAST 23.6.2: NOTE
On windows-x86_64-devel (r-devel)
checking CRAN incoming feasibility ... [17s] NOTE
Maintainer: 'Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>'
New submission
Possibly misspelled words in DESCRIPTION:
Egeler (8:45)
REDCap (2:8, 10:39, 11:30, 14:5)
REDCapRITS (8:26)
interoperability (19:44)
On windows-x86_64-devel (r-devel) Build ID: REDCapCAST_23.6.2.tar.gz-a738190c0d8a4e76b9212e4915625f96
checking for non-standard things in the check directory ... NOTE Platform: Windows Server 2022, R-devel, 64 bit
Found the following files/directories: Submitted: 56m 54.3s ago
Build time: 4m 25.1s
On windows-x86_64-devel (r-devel) checking for non-standard things in the check directory ... NOTE
checking for detritus in the temp directory ... NOTE ''NULL''
checking for detritus in the temp directory ... NOTE
Found the following files/directories: Found the following files/directories:
'lastMiKTeXException' 'lastMiKTeXException'
On ubuntu-gcc-release (r-release) 0 errors ✔ | 0 warnings ✔ | 2 notes ✖
checking CRAN incoming feasibility ... [6s/24s] NOTE
Maintainer: Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>
New submission
Possibly misspelled words in DESCRIPTION:
Egeler (8:45)
REDCap (2:8, 10:39, 11:30, 14:5)
REDCapRITS (8:26)
On ubuntu-gcc-release (r-release), fedora-clang-devel (r-devel) ── REDCapCAST 23.6.2: NOTE
checking HTML version of manual ... NOTE
Build ID: REDCapCAST_23.6.2.tar.gz-a9243a74abae4f04b2a0e29a2751c420
Platform: Ubuntu Linux 20.04.1 LTS, R-release, GCC
Submitted: 56m 54.4s ago
Build time: 32m 6.5s
checking HTML version of manual ... NOTE
Skipping checking HTML validation: no command 'tidy' found Skipping checking HTML validation: no command 'tidy' found
On fedora-clang-devel (r-devel) 0 errors ✔ | 0 warnings ✔ | 1 note ✖
checking CRAN incoming feasibility ... [7s/21s] NOTE
Maintainer: Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>
New submission
Possibly misspelled words in DESCRIPTION:
Egeler (8:45)
REDCap (2:8, 10:39, 11:30, 14:5)
REDCapRITS (8:26)
0 errors ✔ | 0 warnings ✔ | 6 notes ✖ ── REDCapCAST 23.6.2: NOTE
Build ID: REDCapCAST_23.6.2.tar.gz-ba8ade3478c6494b8a8daee08a502f2b
Platform: Fedora Linux, R-devel, clang, gfortran
Submitted: 56m 54.4s ago
Build time: 29m 25.6s
checking HTML version of manual ... NOTE
Skipping checking HTML validation: no command 'tidy' found
0 errors ✔ | 0 warnings ✔ | 1 note ✖

View File

@ -1,11 +1,13 @@
Assesment Assesment
CMD CMD
Codecov Codecov
DOI
DataDictionary DataDictionary
GStat GStat
GithubActions GithubActions
JSON JSON
Lifecycle Lifecycle
METACRAN
Pivotting Pivotting
README README
REDCap REDCap
@ -14,11 +16,13 @@ REDCapRITS
SpectrumHealthResearch SpectrumHealthResearch
al al
api api
da
demonstrational demonstrational
descirption descirption
desireable desireable
doi doi
dplyr dplyr
ds
et et
immprovements immprovements
jbi jbi
@ -27,6 +31,11 @@ matadata
md md
nad nad
og og
param
perl
redcapcast
stRoke
strsplit
thorugh thorugh
tidyverse tidyverse
uri uri

32
man/d2w.Rd Normal file
View File

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.r
\name{d2w}
\alias{d2w}
\title{Convert single digits to words}
\usage{
d2w(x, lang = "en", neutrum = FALSE, everything = FALSE)
}
\arguments{
\item{x}{data. Handle vectors, data.frames and lists}
\item{lang}{language. Danish (da) and English (en), Default is "en"}
\item{neutrum}{for numbers depending on counted word}
\item{everything}{flag to also split numbers >9 to single digits}
}
\value{
returns characters in same format as input
}
\description{
Convert single digits to words
}
\examples{
d2w(c(2:8,21))
d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE)
## If everything=T, also larger numbers are reduced.
## Elements in the list are same length as input
d2w(list(2:8,c(2,6,4,23),2), everything=TRUE)
}

49
man/ds2dd.Rd Normal file
View File

@ -0,0 +1,49 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds2dd.R
\name{ds2dd}
\alias{ds2dd}
\title{Data set to data dictionary function}
\usage{
ds2dd(
ds,
record.id = "record_id",
form.name = "basis",
field.type = "text",
field.label = NULL,
include.column.names = FALSE,
metadata = names(redcapcast_meta)
)
}
\arguments{
\item{ds}{data set}
\item{record.id}{name or column number of id variable, moved to first row of
data dictionary, character of integer. Default is "record_id".}
\item{form.name}{vector of form names, character string, length 1 or length
equal to number of variables. Default is "basis".}
\item{field.type}{vector of field types, character string, length 1 or length
equal to number of variables. Default is "text.}
\item{field.label}{vector of form names, character string, length 1 or length
equal to number of variables. Default is NULL and is then identical to field
names.}
\item{include.column.names}{Flag to give detailed output including new
column names for original data set for upload.}
\item{metadata}{Metadata column names. Default is the included
REDCapCAST::redcapcast_data.}
}
\value{
data.frame or list of data.frame and vector
}
\description{
Migrated from stRoke ds2dd(). Fits better with the functionality of
'REDCapCAST'
}
\examples{
redcapcast_data$record_id <- seq_len(nrow(redcapcast_data))
ds2dd(redcapcast_data, include.column.names=TRUE)
}

30
man/strsplitx.Rd Normal file
View File

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.r
\name{strsplitx}
\alias{strsplitx}
\title{Extended string splitting}
\usage{
strsplitx(x, split, type = "classic", perl = FALSE, ...)
}
\arguments{
\item{x}{data}
\item{split}{delimiter}
\item{type}{Split type. Can be c("classic", "before", "after", "around")}
\item{perl}{perl param from strsplit()}
\item{...}{additional parameters are passed to base strsplit handling splits}
}
\value{
list
}
\description{
Can be used as a substitute of the base function. Main claim to fame is
easing the split around the defined delimiter, see example.
}
\examples{
test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
strsplitx(test,"[0-9]",type="around")
}

View File

@ -0,0 +1,20 @@
test_that("strsplitx works", {
expect_equal(2 * 2, 4)
test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
expect_length(strsplitx(test,"[0-9]",type="around")[[1]],3)
expect_equal(strsplitx(test,"[0-9]",type="classic")[[2]][1],"")
expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4)
expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4)
})
test_that("d2w works", {
expect_length(d2w(c(2:8,21)),8)
expect_equal(d2w(data.frame(2:7,3:8,1),lang="da",
neutrum=TRUE)[1,3],"et")
expect_equal(d2w(list(2:8,c(2,6,4,23),2), everything=T)[[2]][4],"two three")
})