Compare commits

...

5 Commits

14 changed files with 308 additions and 7 deletions

View File

@ -1,3 +0,0 @@
Version: 23.6.3
Date: 2023-07-03 12:02:38 UTC
SHA: ff9270d1e9493760f5aef123d52f48e0148f8da2

View File

@ -1,6 +1,6 @@
Package: stRoke
Title: Clinical Stroke Research
Version: 23.6.3
Version: 23.9.1
Authors@R:
person("Andreas Gammelgaard", "Damsbo", , "agdamsbo@clin.au.dk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7559-1154"))

View File

@ -2,6 +2,7 @@
S3method(print,win_Prob)
export(age_calc)
export(chunks_of_n)
export(ci_plot)
export(color_plot)
export(contrast_text)
@ -13,9 +14,11 @@ export(files_filter)
export(generic_stroke)
export(index_plot)
export(label_select)
export(n_chunks)
export(pase_calc)
export(quantile_cut)
export(source_lines)
export(str_extract)
export(win_prob)
export(write_ical)
import(ggplot2)

12
NEWS.md
View File

@ -1,3 +1,15 @@
# stRoke 23.9.1
### Functions:
* NEW: `chunks_of_n()` uses `split()` to separate supplied vector or data frame into chunks of n. Flags to set if all but the last chunks should be exactly size n, or if they should be evenly sized of max n. Labels can be provided including regex pattern for subject naming to include in chunk names.
* NEW: `n_chunks()` is the opposite of `chunks_of_n()` and is simply a wrapper for this function to create list of n chunks based of provided vector or data frame.
* NEW: `str_extract()` will extract the substring of a character string given by a regex pattern. Came to be as a helper function for labelling chunks in `chunks_of_n()`, but will be useful on its own. Other functions doing the same exists, but this is my take only using base _R_. Draws on `REDCapCAST::strsplitx()`, where splits can be performed around a pattern.
* Deprecation: `ds2dd()` moved to `REDCapCAST::ds2dd()` as this is where it belongs.
# stRoke 23.6.3
### Bug

84
R/chunks_of_n.R Normal file
View File

@ -0,0 +1,84 @@
#' Split to chunks of size n
#'
#' @param d data. Can be vector or data frame.
#' @param n number of chunks
#' @param label naming prefix for chunk names
#' @param even boolean to set if size of chunks should be evenly distributed.
#' @param pattern regex pattern to extract names from provided vector. If data
#' frame, will assume first column is name.
#'
#' @return List of length n
#' @export
#'
#' @examples
#' tail(chunks_of_n(seq_len(100),7),3)
#' tail(chunks_of_n(seq_len(100),7,even=TRUE),3)
#' ds <- data.frame(nm=paste0("Sub",
#' sprintf("%03s", rownames(stRoke::talos))),stRoke::talos)
#' head(chunks_of_n(ds,7,pattern="Sub[0-9]{3}",label="grp"),2)
#' ## Please notice that no sorting is performed. This is on purpose to preserve
#' ## original sorting. If sorting is intended, try something like this:
#' ds[order(ds$nm),] |> chunks_of_n(7,pattern="Sub[0-9]{3}",label="grp") |>
#' head(2)
chunks_of_n <- function(d,n,label=NULL, even=FALSE, pattern=NULL){
if (!(is.vector(d) |
is.data.frame(d)) |
inherits(d,"list")) {
stop("Provided data is not vector or data.frame.")
}
if (is.data.frame(d)) ns <- nrow(d) else ns <- length(d)
if (even) {
g <- sort(rep_len(seq_len(ceiling(ns / n)), ns))
} else {
g <- ceiling(seq_len(ns) / n)
}
ls <- split(d, g)
if (!is.null(pattern)) {
if(is.data.frame(d)) {
ns <- str_extract(d=d[[1]],pattern=pattern)
} else ns <- str_extract(d=d,pattern=pattern)
suffix <- do.call(c, lapply(split(ns, g), function(i) {
paste0(i[[1]], "-", i[[length(i)]])
}))
} else suffix <- names(ls)
if (is.character(label)){
names(ls) <- paste0(label,"-",suffix)
} else names(ls) <- suffix
ls
}
#' Splits in n chunks
#'
#' @param d data
#' @param n number of chunks
#' @param ... arguments passed to internal `chunks_of_n()`
#'
#' @return List of chunks
#' @export
#'
#' @examples
#' lengths(n_chunks(d=seq_len(100),n=7,even=TRUE))
#' lengths(n_chunks(d=seq_len(100),n=7,even=FALSE))
n_chunks <- function(d,n,...){
if (!(is.vector(d) |
is.data.frame(d)) |
inherits(d,"list")) {
stop("Provided data is not vector or data.frame.")
}
if (is.data.frame(d)) ns <- nrow(d) else ns <- length(d)
nn <- ceiling(ns/n)
chunks_of_n(d=d,n=nn,...)
}

View File

@ -1,5 +1,5 @@
utils::globalVariables(c("metadata_names"))
#' Data set to data dictionary function
#' *DEPRECATED* Moved to REDCapCAST::ds2dd() | Data set to data dictionary function
#'
#' @param ds data set
#' @param record.id name or column number of id variable, moved to first row of

37
R/str_extract.R Normal file
View File

@ -0,0 +1,37 @@
#' Extract string based on regex pattern
#'
#' Use base::strsplit to
#' @param d vector of character strings
#' @param pattern regex pattern to match
#'
#' @return vector of character strings
#' @export
#'
#' @examples
#' ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
#' paste(sample(letters,i,TRUE),collapse = "")}))
#' ds <- do.call(c,lapply(1:20,function(i){
#' paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
#' str_extract(ds,"([0-9]+)")
str_extract <- function(d,pattern){
if (!is.vector(d)) stop("Please provide a vector")
## Drawing on the solution in REDCapCAST::strsplitx to split around pattern
nl <- strsplit(gsub("~~", "~", # Removes double ~
gsub("^~", "", # Removes leading ~
gsub(
# Splits and inserts ~ at all delimiters
paste0("(", pattern, ")"), "~\\1~", d
))), "~")
## Reusing the pattern, to sub with "" and match on length 0 to index the
## element containing the pattern. Only first occurance included.
indx <- lapply(nl,function(i){
match(0,nchar(sub(pattern,"",i)))
})
## Using lapply to subsset the given index for each element in list
do.call(c,lapply(seq_along(nl), function(i){
nl[[i]][indx[[i]]]
} ))
}

View File

@ -16,6 +16,7 @@ ORCID
OpenAI's
PASE
REDCap
REDCapCAST
REDCapRITS
RStudio
Randomisation
@ -68,6 +69,7 @@ ics
inteRgrate
jan
jss
labelling
lm
lst
luminance
@ -78,6 +80,7 @@ rect
rgb
sapply
stackoverflow
strsplit
teppo
vapply
vec

37
man/chunks_of_n.Rd Normal file
View File

@ -0,0 +1,37 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/chunks_of_n.R
\name{chunks_of_n}
\alias{chunks_of_n}
\title{Split to chunks of size n}
\usage{
chunks_of_n(d, n, label = NULL, even = FALSE, pattern = NULL)
}
\arguments{
\item{d}{data. Can be vector or data frame.}
\item{n}{number of chunks}
\item{label}{naming prefix for chunk names}
\item{even}{boolean to set if size of chunks should be evenly distributed.}
\item{pattern}{regex pattern to extract names from provided vector. If data
frame, will assume first column is name.}
}
\value{
List of length n
}
\description{
Split to chunks of size n
}
\examples{
tail(chunks_of_n(seq_len(100),7),3)
tail(chunks_of_n(seq_len(100),7,even=TRUE),3)
ds <- data.frame(nm=paste0("Sub",
sprintf("\%03s", rownames(stRoke::talos))),stRoke::talos)
head(chunks_of_n(ds,7,pattern="Sub[0-9]{3}",label="grp"),2)
## Please notice that no sorting is performed. This is on purpose to preserve
## original sorting. If sorting is intended, try something like this:
ds[order(ds$nm),] |> chunks_of_n(7,pattern="Sub[0-9]{3}",label="grp") |>
head(2)
}

View File

@ -2,7 +2,7 @@
% Please edit documentation in R/ds2dd.R
\name{ds2dd}
\alias{ds2dd}
\title{Data set to data dictionary function}
\title{\emph{DEPRECATED} Moved to REDCapCAST::ds2dd() | Data set to data dictionary function}
\usage{
ds2dd(
ds,
@ -40,7 +40,7 @@ stRoke::metadata_names.}
data.frame or list of data.frame and vector
}
\description{
Data set to data dictionary function
\emph{DEPRECATED} Moved to REDCapCAST::ds2dd() | Data set to data dictionary function
}
\examples{
talos$id <- seq_len(nrow(talos))

25
man/n_chunks.Rd Normal file
View File

@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/chunks_of_n.R
\name{n_chunks}
\alias{n_chunks}
\title{Splits in n chunks}
\usage{
n_chunks(d, n, ...)
}
\arguments{
\item{d}{data}
\item{n}{number of chunks}
\item{...}{arguments passed to internal \code{chunks_of_n()}}
}
\value{
List of chunks
}
\description{
Splits in n chunks
}
\examples{
lengths(n_chunks(d=seq_len(100),n=7,even=TRUE))
lengths(n_chunks(d=seq_len(100),n=7,even=FALSE))
}

26
man/str_extract.Rd Normal file
View File

@ -0,0 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/str_extract.R
\name{str_extract}
\alias{str_extract}
\title{Extract string based on regex pattern}
\usage{
str_extract(d, pattern)
}
\arguments{
\item{d}{vector of character strings}
\item{pattern}{regex pattern to match}
}
\value{
vector of character strings
}
\description{
Use base::strsplit to
}
\examples{
ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
paste(sample(letters,i,TRUE),collapse = "")}))
ds <- do.call(c,lapply(1:20,function(i){
paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
str_extract(ds,"([0-9]+)")
}

View File

@ -0,0 +1,62 @@
library(testthat)
test_that("chunks_of_n returns correct", {
expect_length(chunks_of_n(seq_len(100), 7),15)
expect_equal(lengths(chunks_of_n(seq_len(30), 7, even = TRUE),
use.names = FALSE), c(6, 6, 6, 6, 6))
# This is the example from the function, but I believe it fails in GitHub testing
# ds <- data.frame(nm = paste0("Sub",
# sprintf("%03s", rownames(stRoke::talos))),
# stRoke::talos)
ds <- data.frame(nm = paste0("Sub",rownames(stRoke::talos)),
stRoke::talos)
expect_equal(head(names(chunks_of_n(ds, 7,
pattern = "Sub([0-9]+)", label = "grp")),
1),"grp-Sub38-Sub11")
expect_equal(
ds[order(as.numeric(rownames(stRoke::talos))), ] |>
chunks_of_n(7, pattern = "Sub([0-9]+)", label = "grp") |>
head(1) |> names(),
"grp-Sub1-Sub20"
)
## Errors
expect_error(chunks_of_n(list(ds), 7, pattern = "Sub[0-9]{3}", label = "grp"))
})
test_that("n_chunks returns correct", {
expect_length(n_chunks(seq_len(100), 7),7)
expect_equal(lengths(n_chunks(seq_len(30), 7, even = TRUE),
use.names = FALSE), rep(5,6))
# This is the example from the function, but I believe it fails in GitHub testing
# ds <- data.frame(nm = paste0("Sub",
# sprintf("%03s", rownames(stRoke::talos))),
# stRoke::talos)
ds <- data.frame(nm = paste0("Sub",rownames(stRoke::talos)),
stRoke::talos)
expect_equal(head(names(n_chunks(ds, 7,
pattern = "Sub([0-9]+)", label = "grp")),
1),"grp-Sub38-Sub603")
expect_equal(
ds[order(as.numeric(rownames(stRoke::talos))), ] |>
n_chunks(7, pattern = "Sub([0-9]+)", label = "grp") |>
head(1) |> names(),
"grp-Sub1-Sub72"
)
## Errors
expect_error(n_chunks(list(ds), 7, pattern = "Sub([0-9]+)", label = "grp"))
})

View File

@ -0,0 +1,15 @@
# library(testthat)
test_that("str_extract returns correct", {
ls <- do.call(c, lapply(sample(4:8, 20, T), function(i) {
paste(sample(letters, i, T), collapse = "")
}))
ds <- do.call(c, lapply(1:20, function(i) {
paste(sample(ls, 1), i, sample(ls, 1), "23", sep = "_")
}))
expect_equal(nchar(str_extract(ds, "([0-9]+)")),c(rep(1,9),rep(2,11)))
expect_error(str_extract(data.frame(ds), "([0-9]+)"))
})