mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2024-11-22 05:20:23 +01:00
Compare commits
6 Commits
ee396fb811
...
085aa9de62
Author | SHA1 | Date | |
---|---|---|---|
085aa9de62 | |||
e0c1767c9a | |||
66bd7cb67d | |||
64410c910d | |||
cdb3375f6a | |||
8bd4d9ade7 |
@ -1,3 +0,0 @@
|
||||
Version: 23.6.1
|
||||
Date: 2023-06-05 06:44:45 UTC
|
||||
SHA: f2d5b8f6007b7f953c525e58e8b47ec8d702d9be
|
@ -1,6 +1,6 @@
|
||||
Package: REDCapCAST
|
||||
Title: REDCap Castellated Data Handling
|
||||
Version: 23.6.1
|
||||
Version: 23.6.2
|
||||
Authors@R: c(
|
||||
person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", role = c("aut", "cre"),
|
||||
comment = c(ORCID = "0000-0002-7559-1154")),
|
||||
@ -48,6 +48,7 @@ Collate:
|
||||
'utils.r'
|
||||
'process_user_input.r'
|
||||
'REDCap_split.r'
|
||||
'ds2dd.R'
|
||||
'read_redcap_tables.R'
|
||||
'redcap_wider.R'
|
||||
'redcapcast_data.R'
|
||||
|
@ -2,12 +2,15 @@
|
||||
|
||||
export(REDCap_split)
|
||||
export(clean_redcap_name)
|
||||
export(d2w)
|
||||
export(ds2dd)
|
||||
export(focused_metadata)
|
||||
export(match_fields_to_form)
|
||||
export(read_redcap_tables)
|
||||
export(redcap_wider)
|
||||
export(sanitize_split)
|
||||
export(split_non_repeating_forms)
|
||||
export(strsplitx)
|
||||
importFrom(REDCapR,redcap_event_instruments)
|
||||
importFrom(REDCapR,redcap_metadata_read)
|
||||
importFrom(REDCapR,redcap_read)
|
||||
|
13
NEWS.md
13
NEWS.md
@ -1,3 +1,16 @@
|
||||
# REDCapCAST 23.6.2
|
||||
|
||||
This version marks the introduction of a few helper functions to handle database creation.
|
||||
|
||||
### Functions
|
||||
|
||||
* New: `ds2dd()` function migrating from the `stRoke`-package. Assists in building a data dictionary for REDCap from a dataset.
|
||||
|
||||
* New: `strsplitx()` function to ease the string splitting as an extension of `base::strsplit()`. Inspiration from https://stackoverflow.com/a/11014253/21019325 and https://www.r-bloggers.com/2018/04/strsplit-but-keeping-the-delimiter/.
|
||||
|
||||
* New: `d2n()` function converts single digits to written numbers. Used to sanitize variable and form names in REDCap database creation. For more universal number to word I would suggest `english::word()` or `xfun::numbers_to_words()`, though I have not been testing these.
|
||||
|
||||
|
||||
# REDCapCAST 23.6.1
|
||||
|
||||
### Documentation:
|
||||
|
84
R/ds2dd.R
Normal file
84
R/ds2dd.R
Normal file
@ -0,0 +1,84 @@
|
||||
utils::globalVariables(c("redcapcast_meta"))
|
||||
#' Data set to data dictionary function
|
||||
#'
|
||||
#' Migrated from stRoke ds2dd(). Fits better with the functionality of
|
||||
#' 'REDCapCAST'
|
||||
#' @param ds data set
|
||||
#' @param record.id name or column number of id variable, moved to first row of
|
||||
#' data dictionary, character of integer. Default is "record_id".
|
||||
#' @param form.name vector of form names, character string, length 1 or length
|
||||
#' equal to number of variables. Default is "basis".
|
||||
#' @param field.type vector of field types, character string, length 1 or length
|
||||
#' equal to number of variables. Default is "text.
|
||||
#' @param field.label vector of form names, character string, length 1 or length
|
||||
#' equal to number of variables. Default is NULL and is then identical to field
|
||||
#' names.
|
||||
#' @param include.column.names Flag to give detailed output including new
|
||||
#' column names for original data set for upload.
|
||||
#' @param metadata Metadata column names. Default is the included
|
||||
#' REDCapCAST::redcapcast_data.
|
||||
#'
|
||||
#' @return data.frame or list of data.frame and vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' redcapcast_data$record_id <- seq_len(nrow(redcapcast_data))
|
||||
#' ds2dd(redcapcast_data, include.column.names=TRUE)
|
||||
|
||||
ds2dd <-
|
||||
function(ds,
|
||||
record.id = "record_id",
|
||||
form.name = "basis",
|
||||
field.type = "text",
|
||||
field.label = NULL,
|
||||
include.column.names = FALSE,
|
||||
metadata = names(redcapcast_meta)) {
|
||||
dd <- data.frame(matrix(ncol = length(metadata), nrow = ncol(ds)))
|
||||
colnames(dd) <- metadata
|
||||
|
||||
if (is.character(record.id) & !record.id %in% colnames(ds)) {
|
||||
stop("Provided record.id is not a variable name in provided data set.")
|
||||
}
|
||||
|
||||
# renaming to lower case and substitute spaces with underscore
|
||||
field.name <- gsub(" ", "_", tolower(colnames(ds)))
|
||||
|
||||
# handles both character and integer
|
||||
colsel <-
|
||||
colnames(ds) == colnames(ds[record.id])
|
||||
|
||||
if (summary(colsel)[3] != 1) {
|
||||
stop("Provided record.id has to be or refer to a uniquely named column.")
|
||||
}
|
||||
|
||||
dd[, "field_name"] <-
|
||||
c(field.name[colsel], field.name[!colsel])
|
||||
|
||||
if (length(form.name) > 1 & length(form.name) != ncol(ds)) {
|
||||
stop(
|
||||
"Provided form.name should be of length 1 (value is reused) or equal
|
||||
length as number of variables in data set."
|
||||
)
|
||||
}
|
||||
dd[, "form_name"] <- form.name
|
||||
|
||||
if (length(field.type) > 1 & length(field.type) != ncol(ds)) {
|
||||
stop(
|
||||
"Provided field.type should be of length 1 (value is reused) or equal
|
||||
length as number of variables in data set."
|
||||
)
|
||||
}
|
||||
|
||||
dd[, "field_type"] <- field.type
|
||||
|
||||
if (is.null(field.label)) {
|
||||
dd[, "field_label"] <- dd[, "field_name"]
|
||||
} else
|
||||
dd[, "field_label"] <- field.label
|
||||
|
||||
if (include.column.names){
|
||||
list("DataDictionary"=dd,"Column names"=field.name)
|
||||
} else dd
|
||||
}
|
||||
|
||||
|
138
R/utils.r
138
R/utils.r
@ -276,3 +276,141 @@ split_non_repeating_forms <-
|
||||
structure(x, names = forms)
|
||||
|
||||
}
|
||||
|
||||
|
||||
#' Extended string splitting
|
||||
#'
|
||||
#' Can be used as a substitute of the base function. Main claim to fame is
|
||||
#' easing the split around the defined delimiter, see example.
|
||||
#' @param x data
|
||||
#' @param split delimiter
|
||||
#' @param type Split type. Can be c("classic", "before", "after", "around")
|
||||
#' @param perl perl param from strsplit()
|
||||
#' @param ... additional parameters are passed to base strsplit handling splits
|
||||
#'
|
||||
#' @return list
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
|
||||
#' strsplitx(test,"[0-9]",type="around")
|
||||
strsplitx <- function(x,
|
||||
split,
|
||||
type = "classic",
|
||||
perl = FALSE,
|
||||
...) {
|
||||
if (type == "classic") {
|
||||
# use base::strsplit
|
||||
out <- base::strsplit(x = x, split = split, perl = perl, ...)
|
||||
} else if (type == "before") {
|
||||
# split before the delimiter and keep it
|
||||
out <- base::strsplit(x = x,
|
||||
split = paste0("(?<=.)(?=", split, ")"),
|
||||
perl = TRUE,
|
||||
...)
|
||||
} else if (type == "after") {
|
||||
# split after the delimiter and keep it
|
||||
out <- base::strsplit(x = x,
|
||||
split = paste0("(?<=", split, ")"),
|
||||
perl = TRUE,
|
||||
...)
|
||||
} else if (type == "around") {
|
||||
# split around the defined delimiter
|
||||
|
||||
out <- base::strsplit(gsub("~~", "~", # Removes double ~
|
||||
gsub("^~", "", # Removes leading ~
|
||||
gsub(
|
||||
# Splits and inserts ~ at all delimiters
|
||||
paste0("(", split, ")"), "~\\1~", x
|
||||
))), "~")
|
||||
|
||||
} else {
|
||||
# wrong type input
|
||||
stop("type must be 'classic', 'after', 'before' or 'around'!")
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
#' Convert single digits to words
|
||||
#'
|
||||
#' @param x data. Handle vectors, data.frames and lists
|
||||
#' @param lang language. Danish (da) and English (en), Default is "en"
|
||||
#' @param neutrum for numbers depending on counted word
|
||||
#' @param everything flag to also split numbers >9 to single digits
|
||||
#'
|
||||
#' @return returns characters in same format as input
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' d2w(c(2:8,21))
|
||||
#' d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE)
|
||||
#'
|
||||
#' ## If everything=T, also larger numbers are reduced.
|
||||
#' ## Elements in the list are same length as input
|
||||
#' d2w(list(2:8,c(2,6,4,23),2), everything=TRUE)
|
||||
#'
|
||||
d2w <- function(x, lang = "en", neutrum=FALSE, everything=FALSE) {
|
||||
|
||||
# In Danish the written 1 depends on the counted word
|
||||
if (neutrum) nt <- "t" else nt <- "n"
|
||||
|
||||
# A sapply() call with nested lapply() to handle vectors, data.frames and lists
|
||||
convert <- function(x, lang, neutrum) {
|
||||
zero_nine = data.frame(
|
||||
num = 0:9,
|
||||
en = c(
|
||||
'zero',
|
||||
'one',
|
||||
'two',
|
||||
'three',
|
||||
'four',
|
||||
'five',
|
||||
'six',
|
||||
'seven',
|
||||
'eight',
|
||||
'nine'
|
||||
),
|
||||
da = c(
|
||||
"nul",
|
||||
paste0("e",nt),
|
||||
"to",
|
||||
"tre",
|
||||
"fire",
|
||||
"fem",
|
||||
"seks",
|
||||
"syv",
|
||||
"otte",
|
||||
"ni"
|
||||
)
|
||||
)
|
||||
|
||||
wrd <- lapply(x, function(i) {
|
||||
zero_nine[, tolower(lang)][zero_nine[, 1] == i]
|
||||
})
|
||||
|
||||
sub <- lengths(wrd) == 1
|
||||
|
||||
x[sub] <- wrd[sub]
|
||||
|
||||
unlist(x)
|
||||
}
|
||||
|
||||
# Also converts numbers >9 to single digits and writes out
|
||||
# Uses strsplitx()
|
||||
if (everything) {
|
||||
out <- sapply(x,function(y){
|
||||
do.call(c,lapply(y,function(z){
|
||||
v <- strsplitx(z,"[0-9]",type="around")
|
||||
Reduce(paste,sapply(v,convert,lang = lang, neutrum = neutrum))
|
||||
}))
|
||||
|
||||
})
|
||||
} else {
|
||||
out <- sapply(x,convert,lang = lang, neutrum = neutrum)
|
||||
}
|
||||
|
||||
if (is.data.frame(x)) out <- data.frame(out)
|
||||
|
||||
out
|
||||
}
|
||||
|
@ -1,54 +1,22 @@
|
||||
results$cran_summary()
|
||||
For a CRAN submission we recommend that you fix all NOTEs, WARNINGs and ERRORs.
|
||||
## Test environments
|
||||
- R-hub windows-x86_64-devel (r-devel)
|
||||
- R-hub ubuntu-gcc-release (r-release)
|
||||
- R-hub fedora-clang-devel (r-devel)
|
||||
|
||||
## R CMD check results
|
||||
❯ On windows-x86_64-devel (r-devel)
|
||||
checking CRAN incoming feasibility ... [17s] NOTE
|
||||
Maintainer: 'Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>'
|
||||
|
||||
New submission
|
||||
|
||||
Possibly misspelled words in DESCRIPTION:
|
||||
Egeler (8:45)
|
||||
REDCap (2:8, 10:39, 11:30, 14:5)
|
||||
REDCapRITS (8:26)
|
||||
interoperability (19:44)
|
||||
|
||||
❯ On windows-x86_64-devel (r-devel)
|
||||
checking for non-standard things in the check directory ... NOTE
|
||||
Found the following files/directories:
|
||||
''NULL''
|
||||
|
||||
❯ On windows-x86_64-devel (r-devel)
|
||||
checking for detritus in the temp directory ... NOTE
|
||||
Found the following files/directories:
|
||||
'lastMiKTeXException'
|
||||
|
||||
❯ On ubuntu-gcc-release (r-release)
|
||||
checking CRAN incoming feasibility ... [6s/24s] NOTE
|
||||
Maintainer: ‘Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>’
|
||||
|
||||
New submission
|
||||
|
||||
Possibly misspelled words in DESCRIPTION:
|
||||
Egeler (8:45)
|
||||
REDCap (2:8, 10:39, 11:30, 14:5)
|
||||
REDCapRITS (8:26)
|
||||
|
||||
❯ On ubuntu-gcc-release (r-release), fedora-clang-devel (r-devel)
|
||||
checking HTML version of manual ... NOTE
|
||||
Skipping checking HTML validation: no command 'tidy' found
|
||||
|
||||
❯ On fedora-clang-devel (r-devel)
|
||||
checking CRAN incoming feasibility ... [7s/21s] NOTE
|
||||
Maintainer: ‘Andreas Gammelgaard Damsbo <agdamsbo@clin.au.dk>’
|
||||
|
||||
New submission
|
||||
|
||||
Possibly misspelled words in DESCRIPTION:
|
||||
Egeler (8:45)
|
||||
REDCap (2:8, 10:39, 11:30, 14:5)
|
||||
REDCapRITS (8:26)
|
||||
|
||||
0 errors ✔ | 0 warnings ✔ | 6 notes ✖
|
||||
0 errors ✔ | 0 warnings ✔ | 3 notes ✖
|
||||
|
@ -1,11 +1,13 @@
|
||||
Assesment
|
||||
CMD
|
||||
Codecov
|
||||
DOI
|
||||
DataDictionary
|
||||
GStat
|
||||
GithubActions
|
||||
JSON
|
||||
Lifecycle
|
||||
METACRAN
|
||||
Pivotting
|
||||
README
|
||||
REDCap
|
||||
@ -14,11 +16,13 @@ REDCapRITS
|
||||
SpectrumHealthResearch
|
||||
al
|
||||
api
|
||||
da
|
||||
demonstrational
|
||||
descirption
|
||||
desireable
|
||||
doi
|
||||
dplyr
|
||||
ds
|
||||
et
|
||||
immprovements
|
||||
jbi
|
||||
@ -27,6 +31,11 @@ matadata
|
||||
md
|
||||
nad
|
||||
og
|
||||
param
|
||||
perl
|
||||
redcapcast
|
||||
stRoke
|
||||
strsplit
|
||||
thorugh
|
||||
tidyverse
|
||||
uri
|
||||
|
32
man/d2w.Rd
Normal file
32
man/d2w.Rd
Normal file
@ -0,0 +1,32 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utils.r
|
||||
\name{d2w}
|
||||
\alias{d2w}
|
||||
\title{Convert single digits to words}
|
||||
\usage{
|
||||
d2w(x, lang = "en", neutrum = FALSE, everything = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{data. Handle vectors, data.frames and lists}
|
||||
|
||||
\item{lang}{language. Danish (da) and English (en), Default is "en"}
|
||||
|
||||
\item{neutrum}{for numbers depending on counted word}
|
||||
|
||||
\item{everything}{flag to also split numbers >9 to single digits}
|
||||
}
|
||||
\value{
|
||||
returns characters in same format as input
|
||||
}
|
||||
\description{
|
||||
Convert single digits to words
|
||||
}
|
||||
\examples{
|
||||
d2w(c(2:8,21))
|
||||
d2w(data.frame(2:7,3:8,1),lang="da",neutrum=TRUE)
|
||||
|
||||
## If everything=T, also larger numbers are reduced.
|
||||
## Elements in the list are same length as input
|
||||
d2w(list(2:8,c(2,6,4,23),2), everything=TRUE)
|
||||
|
||||
}
|
49
man/ds2dd.Rd
Normal file
49
man/ds2dd.Rd
Normal file
@ -0,0 +1,49 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ds2dd.R
|
||||
\name{ds2dd}
|
||||
\alias{ds2dd}
|
||||
\title{Data set to data dictionary function}
|
||||
\usage{
|
||||
ds2dd(
|
||||
ds,
|
||||
record.id = "record_id",
|
||||
form.name = "basis",
|
||||
field.type = "text",
|
||||
field.label = NULL,
|
||||
include.column.names = FALSE,
|
||||
metadata = names(redcapcast_meta)
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{ds}{data set}
|
||||
|
||||
\item{record.id}{name or column number of id variable, moved to first row of
|
||||
data dictionary, character of integer. Default is "record_id".}
|
||||
|
||||
\item{form.name}{vector of form names, character string, length 1 or length
|
||||
equal to number of variables. Default is "basis".}
|
||||
|
||||
\item{field.type}{vector of field types, character string, length 1 or length
|
||||
equal to number of variables. Default is "text.}
|
||||
|
||||
\item{field.label}{vector of form names, character string, length 1 or length
|
||||
equal to number of variables. Default is NULL and is then identical to field
|
||||
names.}
|
||||
|
||||
\item{include.column.names}{Flag to give detailed output including new
|
||||
column names for original data set for upload.}
|
||||
|
||||
\item{metadata}{Metadata column names. Default is the included
|
||||
REDCapCAST::redcapcast_data.}
|
||||
}
|
||||
\value{
|
||||
data.frame or list of data.frame and vector
|
||||
}
|
||||
\description{
|
||||
Migrated from stRoke ds2dd(). Fits better with the functionality of
|
||||
'REDCapCAST'
|
||||
}
|
||||
\examples{
|
||||
redcapcast_data$record_id <- seq_len(nrow(redcapcast_data))
|
||||
ds2dd(redcapcast_data, include.column.names=TRUE)
|
||||
}
|
30
man/strsplitx.Rd
Normal file
30
man/strsplitx.Rd
Normal file
@ -0,0 +1,30 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/utils.r
|
||||
\name{strsplitx}
|
||||
\alias{strsplitx}
|
||||
\title{Extended string splitting}
|
||||
\usage{
|
||||
strsplitx(x, split, type = "classic", perl = FALSE, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{data}
|
||||
|
||||
\item{split}{delimiter}
|
||||
|
||||
\item{type}{Split type. Can be c("classic", "before", "after", "around")}
|
||||
|
||||
\item{perl}{perl param from strsplit()}
|
||||
|
||||
\item{...}{additional parameters are passed to base strsplit handling splits}
|
||||
}
|
||||
\value{
|
||||
list
|
||||
}
|
||||
\description{
|
||||
Can be used as a substitute of the base function. Main claim to fame is
|
||||
easing the split around the defined delimiter, see example.
|
||||
}
|
||||
\examples{
|
||||
test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
|
||||
strsplitx(test,"[0-9]",type="around")
|
||||
}
|
20
tests/testthat/test-utils.R
Normal file
20
tests/testthat/test-utils.R
Normal file
@ -0,0 +1,20 @@
|
||||
test_that("strsplitx works", {
|
||||
expect_equal(2 * 2, 4)
|
||||
test <- c("12 months follow-up", "3 steps", "mRS 6 weeks", "Counting to 231 now")
|
||||
expect_length(strsplitx(test,"[0-9]",type="around")[[1]],3)
|
||||
|
||||
expect_equal(strsplitx(test,"[0-9]",type="classic")[[2]][1],"")
|
||||
expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4)
|
||||
|
||||
expect_length(strsplitx(test,"[0-9]",type="classic")[[4]],4)
|
||||
})
|
||||
|
||||
test_that("d2w works", {
|
||||
|
||||
expect_length(d2w(c(2:8,21)),8)
|
||||
|
||||
expect_equal(d2w(data.frame(2:7,3:8,1),lang="da",
|
||||
neutrum=TRUE)[1,3],"et")
|
||||
|
||||
expect_equal(d2w(list(2:8,c(2,6,4,23),2), everything=T)[[2]][4],"two three")
|
||||
})
|
Loading…
Reference in New Issue
Block a user