mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2024-11-22 13:30:23 +01:00
255 lines
6.0 KiB
R
255 lines
6.0 KiB
R
#' Convert labelled vectors to factors while preserving attributes
|
|
#'
|
|
#' This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
|
|
#' original attributes except for "class" after converting to factor to avoid
|
|
#' ta loss in case of rich formatted and labelled data.
|
|
#'
|
|
#' Please refer to parent functions for extended documentation.
|
|
#'
|
|
#' @param x Object to coerce to a factor.
|
|
#' @param ... Other arguments passed down to method.
|
|
#' @export
|
|
#' @examples
|
|
#' # will preserve all attributes but class
|
|
#' \dontrun{
|
|
#' c(1, 4, 3, "A", 7, 8, 1) |> as_factor()
|
|
#' structure(c(1, 2, 3, 2, 10, 9),
|
|
#' labels = c(Unknown = 9, Refused = 10)
|
|
#' ) |>
|
|
#' as_factor()
|
|
#'
|
|
#' structure(c(1, 2, 3, 2, 10, 9),
|
|
#' labels = c(Unknown = 9, Refused = 10),
|
|
#' class = "haven_labelled"
|
|
#' ) |>
|
|
#' as_factor()
|
|
#' }
|
|
#' @importFrom forcats as_factor
|
|
#' @importFrom rlang check_dots_used
|
|
#' @export
|
|
#' @name as_factor
|
|
as_factor <- function(x, ...) {
|
|
rlang::check_dots_used()
|
|
UseMethod("as_factor")
|
|
}
|
|
|
|
#' @rdname as_factor
|
|
#' @export
|
|
as_factor.logical <- function(x, ...) {
|
|
labels <- get_attr(x)
|
|
x <- forcats::as_factor(x, ...)
|
|
set_attr(x, labels[-match("class", names(labels))])
|
|
}
|
|
|
|
#' @rdname as_factor
|
|
#' @export
|
|
as_factor.numeric <- function(x, ...) {
|
|
labels <- get_attr(x)
|
|
x <- forcats::as_factor(x, ...)
|
|
set_attr(x, labels[-match("class", names(labels))])
|
|
}
|
|
|
|
#' @rdname as_factor
|
|
#' @export
|
|
as_factor.character <- function(x, ...) {
|
|
labels <- get_attr(x)
|
|
x <- forcats::as_factor(x, ...)
|
|
set_attr(x, labels[-match("class", names(labels))])
|
|
}
|
|
|
|
#' @rdname as_factor
|
|
#' @export
|
|
as_factor.haven_labelled <- function(x, ...) {
|
|
labels <- get_attr(x)
|
|
x <- haven::as_factor(x, ...)
|
|
set_attr(x, labels[-match("class", names(labels))])
|
|
}
|
|
|
|
#' @export
|
|
#' @rdname as_factor
|
|
as_factor.labelled <- as_factor.haven_labelled
|
|
|
|
|
|
|
|
#' Get named vector of factor levels and values
|
|
#'
|
|
#' @param data factor
|
|
#' @param label character string of attribute with named vector of factor labels
|
|
#' @param na.label character string to refactor NA values. Default is NULL.
|
|
#' @param na.value new value for NA strings. Ignored if na.label is NULL.
|
|
#' Default is 99.
|
|
#'
|
|
#' @return named vector
|
|
#' @export
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' structure(c(1, 2, 3, 2, 10, 9),
|
|
#' labels = c(Unknown = 9, Refused = 10),
|
|
#' class = "haven_labelled"
|
|
#' ) |> as_factor() |> named_levels()
|
|
#' }
|
|
named_levels <- function(data, label = "labels",na.label=NULL, na.value=99) {
|
|
stopifnot(is.factor(data))
|
|
if (!is.null(na.label)){
|
|
attrs <- attributes(data)
|
|
lvls <- as.character(data)
|
|
lvls[is.na(lvls)] <- na.label
|
|
vals <- as.numeric(data)
|
|
vals[is.na(vals)] <- na.value
|
|
|
|
lbls <- data.frame(
|
|
name = lvls,
|
|
value = vals
|
|
) |> unique() |>
|
|
(\(d){
|
|
stats::setNames(d$value, d$name)
|
|
})() |>
|
|
sort()
|
|
|
|
data <- do.call(structure,
|
|
c(list(.Data=match(vals,lbls)),
|
|
attrs[-match("levels", names(attrs))],
|
|
list(levels=names(lbls),
|
|
labels=lbls)))
|
|
}
|
|
|
|
d <- data.frame(
|
|
name = levels(data)[data],
|
|
value = as.numeric(data)
|
|
) |>
|
|
unique()
|
|
|
|
## Applying labels
|
|
attr_l <- attr(x = data, which = label, exact = TRUE)
|
|
if (length(attr_l) != 0) {
|
|
d$value[match(names(attr_l), d$name)] <- unname(attr_l)
|
|
}
|
|
|
|
out <- stats::setNames(d$value, d$name)
|
|
## Sort if levels are numeric
|
|
## Else, they appear in order of appearance
|
|
if (identical(
|
|
levels(data),
|
|
suppressWarnings(as.character(as.numeric(levels(data))))
|
|
)) {
|
|
out <- out |> sort()
|
|
}
|
|
out
|
|
}
|
|
|
|
|
|
#' Allows conversion of factor to numeric values preserving original levels
|
|
#'
|
|
#' @param data vector
|
|
#'
|
|
#' @return numeric vector
|
|
#' @export
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' c(1, 4, 3, "A", 7, 8, 1) |>
|
|
#' as_factor() |> fct2num()
|
|
#'
|
|
#' structure(c(1, 2, 3, 2, 10, 9),
|
|
#' labels = c(Unknown = 9, Refused = 10),
|
|
#' class = "haven_labelled"
|
|
#' ) |>
|
|
#' as_factor() |>
|
|
#' fct2num()
|
|
#'
|
|
#' structure(c(1, 2, 3, 2, 10, 9),
|
|
#' labels = c(Unknown = 9, Refused = 10)
|
|
#' ) |>
|
|
#' as_factor() |>
|
|
#' fct2num()
|
|
#' }
|
|
fct2num <- function(data) {
|
|
stopifnot(is.factor(data))
|
|
as.numeric(named_levels(data))[match(data, names(named_levels(data)))]
|
|
}
|
|
|
|
#' Extract attribute. Returns NA if none
|
|
#'
|
|
#' @param data vector
|
|
#' @param attr attribute name
|
|
#'
|
|
#' @return character vector
|
|
#' @export
|
|
#'
|
|
#' @examples
|
|
#' attr(mtcars$mpg, "label") <- "testing"
|
|
#' do.call(c,sapply(mtcars, get_attr))
|
|
#' \dontrun{
|
|
#' mtcars |>
|
|
#' numchar2fct(numeric.threshold = 6) |>
|
|
#' ds2dd_detailed()
|
|
#' }
|
|
get_attr <- function(data, attr = NULL) {
|
|
if (is.null(attr)) {
|
|
attributes(data)
|
|
} else {
|
|
a <- attr(data, attr, exact = TRUE)
|
|
if (is.null(a)) {
|
|
NA
|
|
} else {
|
|
a
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#' Set attributes for named attribute. Appends if attr is NULL
|
|
#'
|
|
#' @param data vector
|
|
#' @param label label
|
|
#' @param attr attribute name
|
|
#' @param overwrite overwrite existing attributes. Default is FALSE.
|
|
#'
|
|
#' @return vector with attribute
|
|
#' @export
|
|
#'
|
|
set_attr <- function(data, label, attr = NULL, overwrite=FALSE) {
|
|
if (is.null(attr)) {
|
|
## Has to be list...
|
|
stopifnot(is.list(label))
|
|
## ... with names
|
|
stopifnot(length(label)==length(names(label)))
|
|
if (!overwrite){
|
|
label <- label[!names(label) %in% names(attributes(data))]
|
|
}
|
|
attributes(data) <- c(attributes(data),label)
|
|
} else {
|
|
attr(data, attr) <- label
|
|
}
|
|
data
|
|
}
|
|
|
|
#' Finish incomplete haven attributes substituting missings with values
|
|
#'
|
|
#' @param data haven labelled variable
|
|
#'
|
|
#' @return named vector
|
|
#' @export
|
|
#'
|
|
#' @examples
|
|
#' ds <- structure(c(1, 2, 3, 2, 10, 9),
|
|
#' labels = c(Unknown = 9, Refused = 10),
|
|
#' class = "haven_labelled"
|
|
#' )
|
|
#' haven::is.labelled(ds)
|
|
#' attributes(ds)
|
|
#' ds |> haven_all_levels()
|
|
haven_all_levels <- function(data) {
|
|
stopifnot(haven::is.labelled(data))
|
|
if (length(attributes(data)$labels) == length(unique(data))) {
|
|
out <- attributes(data)$labels
|
|
} else {
|
|
att <- attributes(data)$labels
|
|
out <- c(unique(data[!data %in% att]), att) |>
|
|
stats::setNames(c(unique(data[!data %in% att]), names(att)))
|
|
}
|
|
out
|
|
}
|
|
|