New str_extract() function to extract substrings by regex pattern.

This commit is contained in:
Andreas Gammelgaard Damsbo 2023-09-05 10:08:26 -07:00
parent 193844c212
commit bd647a9acf
3 changed files with 78 additions and 0 deletions

37
R/str_extract.R Normal file
View File

@ -0,0 +1,37 @@
#' Extract string based on regex pattern
#'
#' Use base::strsplit to
#' @param d vector of character strings
#' @param pattern regex pattern to match
#'
#' @return vector of character strings
#' @export
#'
#' @examples
#' ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
#' paste(sample(letters,i,TRUE),collapse = "")}))
#' ds <- do.call(c,lapply(1:20,function(i){
#' paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
#' str_extract(ds,"([0-9]+)")
str_extract <- function(d,pattern){
if (!is.vector(d)) stop("Please provide a vector")
## Drawing on the solution in REDCapCAST::strsplitx to split around pattern
nl <- strsplit(gsub("~~", "~", # Removes double ~
gsub("^~", "", # Removes leading ~
gsub(
# Splits and inserts ~ at all delimiters
paste0("(", pattern, ")"), "~\\1~", d
))), "~")
## Reusing the pattern, to sub with "" and match on length 0 to index the
## element containing the pattern. Only first occurance included.
indx <- lapply(nl,function(i){
match(0,nchar(sub(pattern,"",i)))
})
## Using lapply to subsset the given index for each element in list
do.call(c,lapply(seq_along(nl), function(i){
nl[[i]][indx[[i]]]
} ))
}

26
man/str_extract.Rd Normal file
View File

@ -0,0 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/str_extract.R
\name{str_extract}
\alias{str_extract}
\title{Extract string based on regex pattern}
\usage{
str_extract(d, pattern)
}
\arguments{
\item{d}{vector of character strings}
\item{pattern}{regex pattern to match}
}
\value{
vector of character strings
}
\description{
Use base::strsplit to
}
\examples{
ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
paste(sample(letters,i,TRUE),collapse = "")}))
ds <- do.call(c,lapply(1:20,function(i){
paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
str_extract(ds,"([0-9]+)")
}

View File

@ -0,0 +1,15 @@
# library(testthat)
test_that("str_extract returns correct", {
ls <- do.call(c, lapply(sample(4:8, 20, T), function(i) {
paste(sample(letters, i, T), collapse = "")
}))
ds <- do.call(c, lapply(1:20, function(i) {
paste(sample(ls, 1), i, sample(ls, 1), "23", sep = "_")
}))
expect_equal(nchar(str_extract(ds, "([0-9]+)")),c(rep(1,9),rep(2,11)))
expect_error(str_extract(data.frame(ds), "([0-9]+)"))
})