stRoke/R/quantile_cut.R

71 lines
1.8 KiB
R
Raw Normal View History

2022-09-22 15:45:40 +02:00
#' Easy function for splitting numeric variable in quantiles
#'
#' Using base/stats functions cut() and quantile().
2022-09-23 09:14:47 +02:00
#'
2022-09-22 15:45:40 +02:00
#' @param x Variable to cut.
#' @param groups Number of groups.
2023-01-12 13:44:29 +01:00
#' @param y alternative vector to draw quantile cuts from. Limits has
#' to be within x. Default is NULL.
2022-09-22 15:45:40 +02:00
#' @param na.rm Remove NA's. Default is TRUE.
2023-01-12 13:44:29 +01:00
#' @param group.names Names of groups to split to. Default is NULL,
#' giving intervals as names.
2022-09-22 15:45:40 +02:00
#' @param ordered.f Set resulting vector as ordered. Default is FALSE.
2022-09-23 12:05:32 +02:00
#' @param detail.list flag to include details or not
2023-01-12 13:44:29 +01:00
#' @param inc.outs Flag to include min(x) and max(x)
#' as borders in case of y!=NULL.
2022-09-23 09:14:47 +02:00
#'
2022-09-23 12:05:32 +02:00
#' @return vector or list with vector and details (length 2)
2022-09-23 09:14:47 +02:00
#'
2022-09-22 15:45:40 +02:00
#' @keywords quantile
#' @export
#' @examples
#' aa <- as.numeric(sample(1:1000,2000,replace = TRUE))
#' x <- 1:450
#' y <- 6:750
2022-09-23 09:14:47 +02:00
#' summary(quantile_cut(aa,groups=4,detail.list=FALSE)) ## Cuts quartiles
2023-01-12 13:44:29 +01:00
quantile_cut <- function (x,
groups,
y = NULL,
na.rm = TRUE,
group.names = NULL,
ordered.f = FALSE,
inc.outs = FALSE,
detail.list = FALSE) {
if (!is.null(y)) {
q <- quantile(
y,
probs = seq(0, 1, 1 / groups),
na.rm = na.rm,
names = TRUE,
type = 7
)
if (inc.outs) {
# Setting cut borders to include outliers in x compared to y.
q[1] <- min(x, na.rm = TRUE)
q[length(q)] <- max(x, na.rm = TRUE)
2022-09-22 15:45:40 +02:00
}
}
2023-01-12 13:44:29 +01:00
if (is.null(y)) {
q <- quantile(
x,
probs = seq(0, 1, 1 / groups),
na.rm = na.rm,
names = TRUE,
type = 7
)
2022-09-22 15:45:40 +02:00
}
2023-01-12 13:44:29 +01:00
d <- cut(
x,
q,
include.lowest = TRUE,
labels = group.names,
ordered_result = ordered.f
)
if (detail.list)
list(d, q)
else
d
2022-09-22 15:45:40 +02:00
}