mirror of
https://github.com/agdamsbo/stRoke.git
synced 2024-11-21 12:30:23 +01:00
New str_extract() function to extract substrings by regex pattern.
This commit is contained in:
parent
193844c212
commit
bd647a9acf
37
R/str_extract.R
Normal file
37
R/str_extract.R
Normal file
@ -0,0 +1,37 @@
|
||||
#' Extract string based on regex pattern
|
||||
#'
|
||||
#' Use base::strsplit to
|
||||
#' @param d vector of character strings
|
||||
#' @param pattern regex pattern to match
|
||||
#'
|
||||
#' @return vector of character strings
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
|
||||
#' paste(sample(letters,i,TRUE),collapse = "")}))
|
||||
#' ds <- do.call(c,lapply(1:20,function(i){
|
||||
#' paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
|
||||
#' str_extract(ds,"([0-9]+)")
|
||||
str_extract <- function(d,pattern){
|
||||
if (!is.vector(d)) stop("Please provide a vector")
|
||||
|
||||
## Drawing on the solution in REDCapCAST::strsplitx to split around pattern
|
||||
nl <- strsplit(gsub("~~", "~", # Removes double ~
|
||||
gsub("^~", "", # Removes leading ~
|
||||
gsub(
|
||||
# Splits and inserts ~ at all delimiters
|
||||
paste0("(", pattern, ")"), "~\\1~", d
|
||||
))), "~")
|
||||
|
||||
## Reusing the pattern, to sub with "" and match on length 0 to index the
|
||||
## element containing the pattern. Only first occurance included.
|
||||
indx <- lapply(nl,function(i){
|
||||
match(0,nchar(sub(pattern,"",i)))
|
||||
})
|
||||
|
||||
## Using lapply to subsset the given index for each element in list
|
||||
do.call(c,lapply(seq_along(nl), function(i){
|
||||
nl[[i]][indx[[i]]]
|
||||
} ))
|
||||
}
|
26
man/str_extract.Rd
Normal file
26
man/str_extract.Rd
Normal file
@ -0,0 +1,26 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/str_extract.R
|
||||
\name{str_extract}
|
||||
\alias{str_extract}
|
||||
\title{Extract string based on regex pattern}
|
||||
\usage{
|
||||
str_extract(d, pattern)
|
||||
}
|
||||
\arguments{
|
||||
\item{d}{vector of character strings}
|
||||
|
||||
\item{pattern}{regex pattern to match}
|
||||
}
|
||||
\value{
|
||||
vector of character strings
|
||||
}
|
||||
\description{
|
||||
Use base::strsplit to
|
||||
}
|
||||
\examples{
|
||||
ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
|
||||
paste(sample(letters,i,TRUE),collapse = "")}))
|
||||
ds <- do.call(c,lapply(1:20,function(i){
|
||||
paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
|
||||
str_extract(ds,"([0-9]+)")
|
||||
}
|
15
tests/testthat/test-str_extract.R
Normal file
15
tests/testthat/test-str_extract.R
Normal file
@ -0,0 +1,15 @@
|
||||
# library(testthat)
|
||||
test_that("str_extract returns correct", {
|
||||
ls <- do.call(c, lapply(sample(4:8, 20, T), function(i) {
|
||||
paste(sample(letters, i, T), collapse = "")
|
||||
}))
|
||||
|
||||
ds <- do.call(c, lapply(1:20, function(i) {
|
||||
paste(sample(ls, 1), i, sample(ls, 1), "23", sep = "_")
|
||||
}))
|
||||
|
||||
expect_equal(nchar(str_extract(ds, "([0-9]+)")),c(rep(1,9),rep(2,11)))
|
||||
|
||||
expect_error(str_extract(data.frame(ds), "([0-9]+)"))
|
||||
|
||||
})
|
Loading…
Reference in New Issue
Block a user