Modified tests to avoid expected GHA test fail with sprintf()

New description and changes
New str_extract() function to extract substrings by regex pattern.
2025-02-20 18:26:53 +01:00 · 2023-09-05 10:56:43 -07:00 · 2023-09-05 10:08:52 -07:00 · 2023-09-05 10:08:26 -07:00 · 2023-09-05 10:07:33 -07:00 · 2023-09-05 10:05:32 -07:00
14 changed files with 308 additions and 7 deletions
--- a/3
+++ b/3
@ -1,3 +0,0 @@
-Version: 23.6.3
-Date: 2023-07-03 12:02:38 UTC
-SHA: ff9270d1e9493760f5aef123d52f48e0148f8da2
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 Package: stRoke
 Title: Clinical Stroke Research
-Version: 23.6.3
+Version: 23.9.1
 Authors@R: 
    person("Andreas Gammelgaard", "Damsbo", , "agdamsbo@clin.au.dk", role = c("aut", "cre"),
           comment = c(ORCID = "0000-0002-7559-1154"))
--- a/3
+++ b/3
@ -2,6 +2,7 @@

 S3method(print,win_Prob)
 export(age_calc)
+export(chunks_of_n)
 export(ci_plot)
 export(color_plot)
 export(contrast_text)
@ -13,9 +14,11 @@ export(files_filter)
 export(generic_stroke)
 export(index_plot)
 export(label_select)
+export(n_chunks)
 export(pase_calc)
 export(quantile_cut)
 export(source_lines)
+export(str_extract)
 export(win_prob)
 export(write_ical)
 import(ggplot2)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,3 +1,15 @@
+# stRoke 23.9.1
+
+### Functions:
+
+* NEW: `chunks_of_n()` uses `split()` to separate supplied vector or data frame into chunks of n. Flags to set if all but the last chunks should be exactly size n, or if they should be evenly sized of max n. Labels can be provided including regex pattern for subject naming to include in chunk names.
+
+* NEW: `n_chunks()` is the opposite of `chunks_of_n()` and is simply a wrapper for this function to create list of n chunks based of provided vector or data frame.
+
+* NEW: `str_extract()` will extract the substring of a character string given by a regex pattern. Came to be as a helper function for labelling chunks in `chunks_of_n()`, but will be useful on its own. Other functions doing the same exists, but this is my take only using base _R_. Draws on `REDCapCAST::strsplitx()`, where splits can be performed around a pattern.
+
+* Deprecation: `ds2dd()` moved to `REDCapCAST::ds2dd()` as this is where it belongs.
+
 # stRoke 23.6.3

 ### Bug
--- a/R/chunks_of_n.R
+++ b/R/chunks_of_n.R
@ -0,0 +1,84 @@
+#' Split to chunks of size n
+#'
+#' @param d data. Can be vector or data frame.
+#' @param n number of chunks
+#' @param label naming prefix for chunk names
+#' @param even boolean to set if size of chunks should be evenly distributed.
+#' @param pattern regex pattern to extract names from provided vector. If data
+#' frame, will assume first column is name.
+#'
+#' @return List of length n
+#' @export
+#'
+#' @examples
+#' tail(chunks_of_n(seq_len(100),7),3)
+#' tail(chunks_of_n(seq_len(100),7,even=TRUE),3)
+#' ds <- data.frame(nm=paste0("Sub",
+#' sprintf("%03s", rownames(stRoke::talos))),stRoke::talos)
+#' head(chunks_of_n(ds,7,pattern="Sub[0-9]{3}",label="grp"),2)
+#' ## Please notice that no sorting is performed. This is on purpose to preserve
+#' ## original sorting. If sorting is intended, try something like this:
+#' ds[order(ds$nm),] |> chunks_of_n(7,pattern="Sub[0-9]{3}",label="grp") |> 
+#' head(2)
+
+chunks_of_n <- function(d,n,label=NULL, even=FALSE, pattern=NULL){
+  
+  if (!(is.vector(d) |
+        is.data.frame(d)) |
+      inherits(d,"list")) {
+    stop("Provided data is not vector or data.frame.")
+  }
+  
+  if (is.data.frame(d)) ns <- nrow(d) else ns <- length(d) 
+  
+  if (even) {
+    g <- sort(rep_len(seq_len(ceiling(ns / n)), ns))
+  } else {
+    g <- ceiling(seq_len(ns) / n)
+  }
+  
+  ls <- split(d, g)
+  
+  if (!is.null(pattern)) {
+    if(is.data.frame(d)) {
+      ns <- str_extract(d=d[[1]],pattern=pattern)
+      } else ns <- str_extract(d=d,pattern=pattern)
+    
+    
+    suffix <- do.call(c, lapply(split(ns, g), function(i) {
+      paste0(i[[1]], "-", i[[length(i)]])
+    }))
+  } else suffix <- names(ls)
+  
+  if (is.character(label)){
+    names(ls) <- paste0(label,"-",suffix)
+    } else names(ls) <- suffix
+  
+  ls
+}
+
+#' Splits in n chunks
+#'
+#' @param d data
+#' @param n number of chunks
+#' @param ... arguments passed to internal `chunks_of_n()`
+#'
+#' @return List of chunks
+#' @export
+#'
+#' @examples
+#' lengths(n_chunks(d=seq_len(100),n=7,even=TRUE))
+#' lengths(n_chunks(d=seq_len(100),n=7,even=FALSE))
+n_chunks <- function(d,n,...){
+  if (!(is.vector(d) |
+        is.data.frame(d)) |
+      inherits(d,"list")) {
+    stop("Provided data is not vector or data.frame.")
+  }
+  
+  if (is.data.frame(d)) ns <- nrow(d) else ns <- length(d) 
+  
+  nn <- ceiling(ns/n)
+  
+  chunks_of_n(d=d,n=nn,...)
+}
--- a/R/ds2dd.R
+++ b/R/ds2dd.R
@ -1,5 +1,5 @@
 utils::globalVariables(c("metadata_names"))
-#' Data set to data dictionary function
+#' *DEPRECATED* Moved to REDCapCAST::ds2dd() | Data set to data dictionary function
 #'
 #' @param ds data set
 #' @param record.id name or column number of id variable, moved to first row of 
--- a/R/str_extract.R
+++ b/R/str_extract.R
@ -0,0 +1,37 @@
+#' Extract string based on regex pattern
+#'
+#' Use base::strsplit to 
+#' @param d vector of character strings
+#' @param pattern regex pattern to match
+#'
+#' @return vector of character strings
+#' @export
+#'
+#' @examples
+#' ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
+#' paste(sample(letters,i,TRUE),collapse = "")}))
+#' ds <- do.call(c,lapply(1:20,function(i){
+#' paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
+#' str_extract(ds,"([0-9]+)")
+str_extract <- function(d,pattern){
+  if (!is.vector(d)) stop("Please provide a vector")
+  
+  ## Drawing on the solution in REDCapCAST::strsplitx to split around pattern
+  nl <- strsplit(gsub("~~", "~", # Removes double ~
+                      gsub("^~", "", # Removes leading ~
+                           gsub(
+                             # Splits and inserts ~ at all delimiters
+                             paste0("(", pattern, ")"), "~\\1~", d
+                           ))), "~")
+  
+  ## Reusing the pattern, to sub with "" and match on length 0 to index the
+  ## element containing the pattern. Only first occurance included.
+  indx <- lapply(nl,function(i){
+    match(0,nchar(sub(pattern,"",i)))
+    })
+  
+  ## Using lapply to subsset the given index for each element in list
+  do.call(c,lapply(seq_along(nl), function(i){
+    nl[[i]][indx[[i]]]
+  } ))
+}
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@ -16,6 +16,7 @@ ORCID
 OpenAI's
 PASE
 REDCap
+REDCapCAST
 REDCapRITS
 RStudio
 Randomisation
@ -68,6 +69,7 @@ ics
 inteRgrate
 jan
 jss
+labelling
 lm
 lst
 luminance
@ -78,6 +80,7 @@ rect
 rgb
 sapply
 stackoverflow
+strsplit
 teppo
 vapply
 vec
--- a/man/chunks_of_n.Rd
+++ b/man/chunks_of_n.Rd
@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chunks_of_n.R
+\name{chunks_of_n}
+\alias{chunks_of_n}
+\title{Split to chunks of size n}
+\usage{
+chunks_of_n(d, n, label = NULL, even = FALSE, pattern = NULL)
+}
+\arguments{
+\item{d}{data. Can be vector or data frame.}
+
+\item{n}{number of chunks}
+
+\item{label}{naming prefix for chunk names}
+
+\item{even}{boolean to set if size of chunks should be evenly distributed.}
+
+\item{pattern}{regex pattern to extract names from provided vector. If data
+frame, will assume first column is name.}
+}
+\value{
+List of length n
+}
+\description{
+Split to chunks of size n
+}
+\examples{
+tail(chunks_of_n(seq_len(100),7),3)
+tail(chunks_of_n(seq_len(100),7,even=TRUE),3)
+ds <- data.frame(nm=paste0("Sub",
+sprintf("\%03s", rownames(stRoke::talos))),stRoke::talos)
+head(chunks_of_n(ds,7,pattern="Sub[0-9]{3}",label="grp"),2)
+## Please notice that no sorting is performed. This is on purpose to preserve
+## original sorting. If sorting is intended, try something like this:
+ds[order(ds$nm),] |> chunks_of_n(7,pattern="Sub[0-9]{3}",label="grp") |> 
+head(2)
+}
--- a/man/ds2dd.Rd
+++ b/man/ds2dd.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/ds2dd.R
 \name{ds2dd}
 \alias{ds2dd}
-\title{Data set to data dictionary function}
+\title{\emph{DEPRECATED} Moved to REDCapCAST::ds2dd() | Data set to data dictionary function}
 \usage{
 ds2dd(
  ds,
@ -40,7 +40,7 @@ stRoke::metadata_names.}
 data.frame or list of data.frame and vector
 }
 \description{
-Data set to data dictionary function
+\emph{DEPRECATED} Moved to REDCapCAST::ds2dd() | Data set to data dictionary function
 }
 \examples{
 talos$id <- seq_len(nrow(talos))
--- a/man/n_chunks.Rd
+++ b/man/n_chunks.Rd
@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chunks_of_n.R
+\name{n_chunks}
+\alias{n_chunks}
+\title{Splits in n chunks}
+\usage{
+n_chunks(d, n, ...)
+}
+\arguments{
+\item{d}{data}
+
+\item{n}{number of chunks}
+
+\item{...}{arguments passed to internal \code{chunks_of_n()}}
+}
+\value{
+List of chunks
+}
+\description{
+Splits in n chunks
+}
+\examples{
+lengths(n_chunks(d=seq_len(100),n=7,even=TRUE))
+lengths(n_chunks(d=seq_len(100),n=7,even=FALSE))
+}
--- a/man/str_extract.Rd
+++ b/man/str_extract.Rd
@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/str_extract.R
+\name{str_extract}
+\alias{str_extract}
+\title{Extract string based on regex pattern}
+\usage{
+str_extract(d, pattern)
+}
+\arguments{
+\item{d}{vector of character strings}
+
+\item{pattern}{regex pattern to match}
+}
+\value{
+vector of character strings
+}
+\description{
+Use base::strsplit to
+}
+\examples{
+ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
+paste(sample(letters,i,TRUE),collapse = "")}))
+ds <- do.call(c,lapply(1:20,function(i){
+paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
+str_extract(ds,"([0-9]+)")
+}
--- a/tests/testthat/test-chunks.R
+++ b/tests/testthat/test-chunks.R
@ -0,0 +1,62 @@
+library(testthat)
+test_that("chunks_of_n returns correct", {
+  expect_length(chunks_of_n(seq_len(100), 7),15)
+  
+  expect_equal(lengths(chunks_of_n(seq_len(30), 7, even = TRUE), 
+                       use.names = FALSE), c(6, 6, 6, 6, 6))
+  
+  # This is the example from the function, but I believe it fails in GitHub testing
+  # ds <- data.frame(nm = paste0("Sub",
+  #                              sprintf("%03s", rownames(stRoke::talos))),
+  #                  stRoke::talos)
+  
+  ds <- data.frame(nm = paste0("Sub",rownames(stRoke::talos)), 
+                   stRoke::talos)
+  
+  expect_equal(head(names(chunks_of_n(ds, 7, 
+                                      pattern = "Sub([0-9]+)", label = "grp")),
+                    1),"grp-Sub38-Sub11")
+  
+  expect_equal(
+    ds[order(as.numeric(rownames(stRoke::talos))), ] |> 
+      chunks_of_n(7, pattern = "Sub([0-9]+)", label = "grp") |>  
+      head(1) |> names(),
+    "grp-Sub1-Sub20"
+  )
+  
+  ## Errors
+  expect_error(chunks_of_n(list(ds), 7, pattern = "Sub[0-9]{3}", label = "grp"))
+  
+  
+})
+
+test_that("n_chunks returns correct", {
+  expect_length(n_chunks(seq_len(100), 7),7)
+  
+  expect_equal(lengths(n_chunks(seq_len(30), 7, even = TRUE), 
+                       use.names = FALSE), rep(5,6))
+  
+  # This is the example from the function, but I believe it fails in GitHub testing
+  # ds <- data.frame(nm = paste0("Sub",
+  #                              sprintf("%03s", rownames(stRoke::talos))),
+  #                  stRoke::talos)
+  
+  ds <- data.frame(nm = paste0("Sub",rownames(stRoke::talos)), 
+                   stRoke::talos)
+  
+  expect_equal(head(names(n_chunks(ds, 7, 
+                                      pattern = "Sub([0-9]+)", label = "grp")),
+                    1),"grp-Sub38-Sub603")
+  
+  expect_equal(
+    ds[order(as.numeric(rownames(stRoke::talos))), ] |> 
+      n_chunks(7, pattern = "Sub([0-9]+)", label = "grp") |>  
+      head(1) |> names(),
+    "grp-Sub1-Sub72"
+  )
+  
+  ## Errors
+  expect_error(n_chunks(list(ds), 7, pattern = "Sub([0-9]+)", label = "grp"))
+  
+  
+})
--- a/tests/testthat/test-str_extract.R
+++ b/tests/testthat/test-str_extract.R
@ -0,0 +1,15 @@
+# library(testthat)
+test_that("str_extract returns correct", {
+  ls <- do.call(c, lapply(sample(4:8, 20, T), function(i) {
+    paste(sample(letters, i, T), collapse = "")
+  }))
+  
+  ds <- do.call(c, lapply(1:20, function(i) {
+    paste(sample(ls, 1), i, sample(ls, 1), "23", sep = "_")
+  }))
+  
+  expect_equal(nchar(str_extract(ds, "([0-9]+)")),c(rep(1,9),rep(2,11)))
+  
+  expect_error(str_extract(data.frame(ds), "([0-9]+)"))
+  
+})
Author	SHA1	Message	Date
AG Damsbo	163e970865	Modified tests to avoid expected GHA test fail with sprintf()	2023-09-05 10:56:43 -07:00
AG Damsbo	aed6ef02b0	New description and changes	2023-09-05 10:08:52 -07:00
AG Damsbo	bd647a9acf	New str_extract() function to extract substrings by regex pattern.	2023-09-05 10:08:26 -07:00
AG Damsbo	193844c212	New chunks_of_n() and n_chunks() functions	2023-09-05 10:07:33 -07:00
AG Damsbo	f810a7af53	Moved to REDCapCAST::de2dd()	2023-09-05 10:05:32 -07:00