mirror of
https://github.com/agdamsbo/stRoke.git
synced 2024-12-03 17:21:53 +01:00
New chunks_of_n() and n_chunks() functions
This commit is contained in:
parent
f810a7af53
commit
193844c212
84
R/chunks_of_n.R
Normal file
84
R/chunks_of_n.R
Normal file
@ -0,0 +1,84 @@
|
||||
#' Split to chunks of size n
|
||||
#'
|
||||
#' @param d data. Can be vector or data frame.
|
||||
#' @param n number of chunks
|
||||
#' @param label naming prefix for chunk names
|
||||
#' @param even boolean to set if size of chunks should be evenly distributed.
|
||||
#' @param pattern regex pattern to extract names from provided vector. If data
|
||||
#' frame, will assume first column is name.
|
||||
#'
|
||||
#' @return List of length n
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' tail(chunks_of_n(seq_len(100),7),3)
|
||||
#' tail(chunks_of_n(seq_len(100),7,even=TRUE),3)
|
||||
#' ds <- data.frame(nm=paste0("Sub",
|
||||
#' sprintf("%03s", rownames(stRoke::talos))),stRoke::talos)
|
||||
#' head(chunks_of_n(ds,7,pattern="Sub[0-9]{3}",label="grp"),2)
|
||||
#' ## Please notice that no sorting is performed. This is on purpose to preserve
|
||||
#' ## original sorting. If sorting is intended, try something like this:
|
||||
#' ds[order(ds$nm),] |> chunks_of_n(7,pattern="Sub[0-9]{3}",label="grp") |>
|
||||
#' head(2)
|
||||
|
||||
chunks_of_n <- function(d,n,label=NULL, even=FALSE, pattern=NULL){
|
||||
|
||||
if (!(is.vector(d) |
|
||||
is.data.frame(d)) |
|
||||
inherits(d,"list")) {
|
||||
stop("Provided data is not vector or data.frame.")
|
||||
}
|
||||
|
||||
if (is.data.frame(d)) ns <- nrow(d) else ns <- length(d)
|
||||
|
||||
if (even) {
|
||||
g <- sort(rep_len(seq_len(ceiling(ns / n)), ns))
|
||||
} else {
|
||||
g <- ceiling(seq_len(ns) / n)
|
||||
}
|
||||
|
||||
ls <- split(d, g)
|
||||
|
||||
if (!is.null(pattern)) {
|
||||
if(is.data.frame(d)) {
|
||||
ns <- str_extract(d=d[[1]],pattern=pattern)
|
||||
} else ns <- str_extract(d=d,pattern=pattern)
|
||||
|
||||
|
||||
suffix <- do.call(c, lapply(split(ns, g), function(i) {
|
||||
paste0(i[[1]], "-", i[[length(i)]])
|
||||
}))
|
||||
} else suffix <- names(ls)
|
||||
|
||||
if (is.character(label)){
|
||||
names(ls) <- paste0(label,"-",suffix)
|
||||
} else names(ls) <- suffix
|
||||
|
||||
ls
|
||||
}
|
||||
|
||||
#' Splits in n chunks
|
||||
#'
|
||||
#' @param d data
|
||||
#' @param n number of chunks
|
||||
#' @param ... arguments passed to internal `chunks_of_n()`
|
||||
#'
|
||||
#' @return List of chunks
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' lengths(n_chunks(d=seq_len(100),n=7,even=TRUE))
|
||||
#' lengths(n_chunks(d=seq_len(100),n=7,even=FALSE))
|
||||
n_chunks <- function(d,n,...){
|
||||
if (!(is.vector(d) |
|
||||
is.data.frame(d)) |
|
||||
inherits(d,"list")) {
|
||||
stop("Provided data is not vector or data.frame.")
|
||||
}
|
||||
|
||||
if (is.data.frame(d)) ns <- nrow(d) else ns <- length(d)
|
||||
|
||||
nn <- ceiling(ns/n)
|
||||
|
||||
chunks_of_n(d=d,n=nn,...)
|
||||
}
|
37
man/chunks_of_n.Rd
Normal file
37
man/chunks_of_n.Rd
Normal file
@ -0,0 +1,37 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/chunks_of_n.R
|
||||
\name{chunks_of_n}
|
||||
\alias{chunks_of_n}
|
||||
\title{Split to chunks of size n}
|
||||
\usage{
|
||||
chunks_of_n(d, n, label = NULL, even = FALSE, pattern = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{d}{data. Can be vector or data frame.}
|
||||
|
||||
\item{n}{number of chunks}
|
||||
|
||||
\item{label}{naming prefix for chunk names}
|
||||
|
||||
\item{even}{boolean to set if size of chunks should be evenly distributed.}
|
||||
|
||||
\item{pattern}{regex pattern to extract names from provided vector. If data
|
||||
frame, will assume first column is name.}
|
||||
}
|
||||
\value{
|
||||
List of length n
|
||||
}
|
||||
\description{
|
||||
Split to chunks of size n
|
||||
}
|
||||
\examples{
|
||||
tail(chunks_of_n(seq_len(100),7),3)
|
||||
tail(chunks_of_n(seq_len(100),7,even=TRUE),3)
|
||||
ds <- data.frame(nm=paste0("Sub",
|
||||
sprintf("\%03s", rownames(stRoke::talos))),stRoke::talos)
|
||||
head(chunks_of_n(ds,7,pattern="Sub[0-9]{3}",label="grp"),2)
|
||||
## Please notice that no sorting is performed. This is on purpose to preserve
|
||||
## original sorting. If sorting is intended, try something like this:
|
||||
ds[order(ds$nm),] |> chunks_of_n(7,pattern="Sub[0-9]{3}",label="grp") |>
|
||||
head(2)
|
||||
}
|
25
man/n_chunks.Rd
Normal file
25
man/n_chunks.Rd
Normal file
@ -0,0 +1,25 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/chunks_of_n.R
|
||||
\name{n_chunks}
|
||||
\alias{n_chunks}
|
||||
\title{Splits in n chunks}
|
||||
\usage{
|
||||
n_chunks(d, n, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{d}{data}
|
||||
|
||||
\item{n}{number of chunks}
|
||||
|
||||
\item{...}{arguments passed to internal \code{chunks_of_n()}}
|
||||
}
|
||||
\value{
|
||||
List of chunks
|
||||
}
|
||||
\description{
|
||||
Splits in n chunks
|
||||
}
|
||||
\examples{
|
||||
lengths(n_chunks(d=seq_len(100),n=7,even=TRUE))
|
||||
lengths(n_chunks(d=seq_len(100),n=7,even=FALSE))
|
||||
}
|
56
tests/testthat/test-chunks.R
Normal file
56
tests/testthat/test-chunks.R
Normal file
@ -0,0 +1,56 @@
|
||||
# library(testthat)
|
||||
test_that("chunks_of_n returns correct", {
|
||||
expect_length(chunks_of_n(seq_len(100), 7),15)
|
||||
|
||||
expect_equal(lengths(chunks_of_n(seq_len(30), 7, even = TRUE),
|
||||
use.names = FALSE), c(6, 6, 6, 6, 6))
|
||||
|
||||
ds <- data.frame(nm = paste0("Sub",
|
||||
sprintf("%03s", rownames(stRoke::talos))),
|
||||
stRoke::talos)
|
||||
|
||||
is.list(ds)
|
||||
|
||||
class(list(ds))
|
||||
|
||||
expect_equal(head(names(chunks_of_n(ds, 7,
|
||||
pattern = "Sub[0-9]{3}", label = "grp")),
|
||||
1),"grp-Sub038-Sub011")
|
||||
|
||||
expect_equal(
|
||||
ds[order(ds$nm),] |> chunks_of_n(7, pattern = "Sub[0-9]{3}", label =
|
||||
"grp") |> head(1) |> names(),
|
||||
"grp-Sub001-Sub020"
|
||||
)
|
||||
|
||||
## Errors
|
||||
expect_error(chunks_of_n(list(ds), 7, pattern = "Sub[0-9]{3}", label = "grp"))
|
||||
|
||||
|
||||
})
|
||||
|
||||
test_that("n_chunks returns correct", {
|
||||
expect_length(n_chunks(seq_len(100), 7),7)
|
||||
|
||||
expect_equal(lengths(n_chunks(seq_len(30), 7, even = TRUE),
|
||||
use.names = FALSE), rep(5,6))
|
||||
|
||||
ds <- data.frame(nm = paste0("Sub",
|
||||
sprintf("%03s", rownames(stRoke::talos))),
|
||||
stRoke::talos)
|
||||
|
||||
expect_equal(head(names(n_chunks(ds, 7,
|
||||
pattern = "Sub[0-9]{3}", label = "grp")),
|
||||
1),"grp-Sub038-Sub603")
|
||||
|
||||
expect_equal(
|
||||
ds[order(ds$nm),] |> n_chunks(7, pattern = "Sub[0-9]{3}", label =
|
||||
"grp") |> head(1) |> names(),
|
||||
"grp-Sub001-Sub072"
|
||||
)
|
||||
|
||||
## Errors
|
||||
expect_error(n_chunks(list(ds), 7, pattern = "Sub[0-9]{3}", label = "grp"))
|
||||
|
||||
|
||||
})
|
Loading…
Reference in New Issue
Block a user