mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2024-11-21 13:00:23 +01:00
as_factor functions to preserve attributes
This commit is contained in:
parent
42efec437a
commit
c3b54b0860
249
R/as_factor.R
Normal file
249
R/as_factor.R
Normal file
@ -0,0 +1,249 @@
|
||||
#' Convert labelled vectors to factors while preserving attributes
|
||||
#'
|
||||
#' This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
|
||||
#' original attributes except for "class" after converting to factor to avoid
|
||||
#' ta loss in case of rich formatted and labelled data.
|
||||
#'
|
||||
#' Please refer to parent functions for extended documentation.
|
||||
#'
|
||||
#' @param x Object to coerce to a factor.
|
||||
#' @param ... Other arguments passed down to method.
|
||||
#' @export
|
||||
#' @examples
|
||||
#' # will preserve all attributes but class
|
||||
#' c(1, 4, 3, "A", 7, 8, 1) |> as_factor()
|
||||
#' structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10)
|
||||
#' ) |>
|
||||
#' as_factor()
|
||||
#'
|
||||
#' structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10),
|
||||
#' class = "haven_labelled"
|
||||
#' ) |>
|
||||
#' as_factor()
|
||||
#'
|
||||
#' @importFrom forcats as_factor
|
||||
#' @importFrom rlang check_dots_used
|
||||
#' @export
|
||||
#' @name as_factor
|
||||
as_factor <- function(x, ...) {
|
||||
rlang::check_dots_used()
|
||||
UseMethod("as_factor")
|
||||
}
|
||||
|
||||
#' @rdname as_factor
|
||||
#' @export
|
||||
as_factor.logical <- function(x, ...) {
|
||||
labels <- get_attr(x)
|
||||
x <- forcats::as_factor(x, ...)
|
||||
set_attr(x, labels[-match("class", names(labels))])
|
||||
}
|
||||
|
||||
#' @rdname as_factor
|
||||
#' @export
|
||||
as_factor.numeric <- function(x, ...) {
|
||||
labels <- get_attr(x)
|
||||
x <- forcats::as_factor(x, ...)
|
||||
set_attr(x, labels[-match("class", names(labels))])
|
||||
}
|
||||
|
||||
#' @rdname as_factor
|
||||
#' @export
|
||||
as_factor.character <- function(x, ...) {
|
||||
labels <- get_attr(x)
|
||||
x <- forcats::as_factor(x, ...)
|
||||
set_attr(x, labels[-match("class", names(labels))])
|
||||
}
|
||||
|
||||
#' @rdname as_factor
|
||||
#' @export
|
||||
as_factor.haven_labelled <- function(x, ...) {
|
||||
labels <- get_attr(x)
|
||||
x <- haven::as_factor(x, ...)
|
||||
set_attr(x, labels[-match("class", names(labels))])
|
||||
}
|
||||
|
||||
#' @export
|
||||
#' @rdname as_factor
|
||||
as_factor.labelled <- as_factor.haven_labelled
|
||||
|
||||
|
||||
|
||||
#' Get named vector of factor levels and values
|
||||
#'
|
||||
#' @param data factor
|
||||
#' @param label character string of attribute with named vector of factor labels
|
||||
#'
|
||||
#' @return named vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10),
|
||||
#' class = "haven_labelled"
|
||||
#' ) |> as_factor() |> named_levels()
|
||||
named_levels <- function(data, label = "labels",na.label=NULL, na.value=99) {
|
||||
stopifnot(is.factor(data))
|
||||
if (!is.null(na.label)){
|
||||
attrs <- attributes(data)
|
||||
lvls <- as.character(data)
|
||||
lvls[is.na(lvls)] <- na.label
|
||||
vals <- as.numeric(data)
|
||||
vals[is.na(vals)] <- na.value
|
||||
|
||||
lbls <- data.frame(
|
||||
name = lvls,
|
||||
value = vals
|
||||
) |> unique() |>
|
||||
(\(d){
|
||||
stats::setNames(d$value, d$name)
|
||||
})() |>
|
||||
sort()
|
||||
|
||||
data <- do.call(structure,
|
||||
c(list(.Data=match(vals,lbls)),
|
||||
attrs[-match("levels", names(attrs))],
|
||||
list(levels=names(lbls),
|
||||
labels=lbls)))
|
||||
}
|
||||
|
||||
d <- data.frame(
|
||||
name = levels(data)[data],
|
||||
value = as.numeric(data)
|
||||
) |>
|
||||
unique()
|
||||
|
||||
## Applying labels
|
||||
attr_l <- attr(x = data, which = label, exact = TRUE)
|
||||
if (length(attr_l) != 0) {
|
||||
d$value[match(names(attr_l), d$name)] <- unname(attr_l)
|
||||
}
|
||||
|
||||
out <- stats::setNames(d$value, d$name)
|
||||
## Sort if levels are numeric
|
||||
## Else, they appear in order of appearance
|
||||
if (identical(
|
||||
levels(data),
|
||||
suppressWarnings(as.character(as.numeric(levels(data))))
|
||||
)) {
|
||||
out <- out |> sort()
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
#' Allows conversion of factor to numeric values preserving original levels
|
||||
#'
|
||||
#' @param data vector
|
||||
#'
|
||||
#' @return numeric vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' c(1, 4, 3, "A", 7, 8, 1) |>
|
||||
#' as_factor() |> fct2num()
|
||||
#'
|
||||
#' structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10),
|
||||
#' class = "haven_labelled"
|
||||
#' ) |>
|
||||
#' as_factor() |>
|
||||
#' fct2num()
|
||||
#'
|
||||
#' structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10)
|
||||
#' ) |>
|
||||
#' as_factor() |>
|
||||
#' fct2num()
|
||||
fct2num <- function(data) {
|
||||
stopifnot(is.factor(data))
|
||||
as.numeric(named_levels(data))[match(data, names(named_levels(data)))]
|
||||
}
|
||||
|
||||
#' Extract attribute. Returns NA if none
|
||||
#'
|
||||
#' @param data vector
|
||||
#' @param attr attribute name
|
||||
#'
|
||||
#' @return character vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' attr(mtcars$mpg, "label") <- "testing"
|
||||
#' sapply(mtcars, get_attr)
|
||||
#' lapply(mtcars, \(.x)get_attr(.x, NULL))
|
||||
#' mtcars |>
|
||||
#' numchar2fct(numeric.threshold = 6) |>
|
||||
#' ds2dd_detailed()
|
||||
get_attr <- function(data, attr = NULL) {
|
||||
if (is.null(attr)) {
|
||||
attributes(data)
|
||||
} else {
|
||||
a <- attr(data, attr, exact = TRUE)
|
||||
if (is.null(a)) {
|
||||
NA
|
||||
} else {
|
||||
a
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#' Set attributes for named attribute. Appends if attr is NULL
|
||||
#'
|
||||
#' @param data vector
|
||||
#' @param label label
|
||||
#' @param attr attribute name
|
||||
#'
|
||||
#' @return vector with attribute
|
||||
#' @export
|
||||
#'
|
||||
set_attr <- function(data, label, attr = NULL) {
|
||||
if (is.null(attr)) {
|
||||
## Has to be list...
|
||||
stopifnot(is.list(label))
|
||||
## ... with names
|
||||
stopifnot(length(label)==length(names(label)))
|
||||
attributes(data) <- c(attributes(data),label)
|
||||
} else {
|
||||
attr(data, attr) <- label
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
#' Finish incomplete haven attributes substituting missings with values
|
||||
#'
|
||||
#' @param data haven labelled variable
|
||||
#'
|
||||
#' @return named vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' ds <- structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10),
|
||||
#' class = "haven_labelled"
|
||||
#' )
|
||||
#' haven::is.labelled(ds)
|
||||
#' attributes(ds)
|
||||
#' ds |> haven_all_levels()
|
||||
haven_all_levels <- function(data) {
|
||||
stopifnot(haven::is.labelled(data))
|
||||
if (length(attributes(data)$labels) == length(unique(data))) {
|
||||
out <- attributes(data)$labels
|
||||
} else {
|
||||
att <- attributes(data)$labels
|
||||
out <- c(unique(data[!data %in% att]), att) |>
|
||||
stats::setNames(c(unique(data[!data %in% att]), names(att)))
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
# readr::read_rds("/Users/au301842/PAaSO/labelled_test.rds") |> ds2dd_detailed()
|
||||
#' sample(c(TRUE,FALSE,NA),20,TRUE) |> set_attr("hidden","status") |> trial_fct() |> named_levels(na.label = "Missing") |> sort()
|
||||
# trial_fct <- function(x){
|
||||
# labels <- get_attr(x)
|
||||
# x <- factor(x, levels = c("FALSE", "TRUE"))
|
||||
# set_attr(x, labels[-match("class", names(labels))])
|
||||
# }
|
||||
|
@ -172,20 +172,20 @@ ds2dd_detailed <- function(data,
|
||||
|
||||
if (convert.logicals) {
|
||||
# Labels/attributes are saved
|
||||
labels <- lapply(data, \(.x){
|
||||
get_attr(.x, attr = NULL)
|
||||
})
|
||||
# labels <- lapply(data, \(.x){
|
||||
# get_attr(.x, attr = NULL)
|
||||
# })
|
||||
|
||||
no_attr <- data |>
|
||||
data <- data |>
|
||||
## Converts logical to factor, which overwrites attributes
|
||||
dplyr::mutate(dplyr::across(dplyr::where(is.logical), forcats::as_factor))
|
||||
dplyr::mutate(dplyr::across(dplyr::where(is.logical), as_factor))
|
||||
|
||||
# Old attributes are appended
|
||||
data <- purrr::imap(no_attr,\(.x,.i){
|
||||
attributes(.x) <- c(attributes(.x),labels[[.i]])
|
||||
.x
|
||||
}) |>
|
||||
dplyr::bind_cols()
|
||||
# data <- purrr::imap(no_attr,\(.x,.i){
|
||||
# attributes(.x) <- c(attributes(.x),labels[[.i]])
|
||||
# .x
|
||||
# }) |>
|
||||
# dplyr::bind_cols()
|
||||
|
||||
}
|
||||
|
||||
@ -262,7 +262,6 @@ ds2dd_detailed <- function(data,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
data_classes <- do.call(c, lapply(data, \(.x)class(.x)[1]))
|
||||
|
||||
## field_type
|
||||
@ -308,27 +307,15 @@ ds2dd_detailed <- function(data,
|
||||
|
||||
## choices
|
||||
|
||||
if (any(do.call(c, lapply(data, haven::is.labelled)))) {
|
||||
factor_levels <- data |>
|
||||
lapply(function(x) {
|
||||
if (haven::is.labelled(x)) {
|
||||
att <- haven_all_levels(x)
|
||||
paste(paste(att, names(att), sep = ", "), collapse = " | ")
|
||||
} else {
|
||||
NA
|
||||
}
|
||||
}) |>
|
||||
(\(x)do.call(c, x))()
|
||||
} else {
|
||||
factor_levels <- data |>
|
||||
factor_levels <- data |>
|
||||
lapply(function(x) {
|
||||
if (is.factor(x)) {
|
||||
## Re-factors to avoid confusion with missing levels
|
||||
## Assumes all relevant levels are represented in the data
|
||||
re_fac <- factor(x)
|
||||
## Custom function to ensure factor order and keep original values
|
||||
## Avoiding refactoring to keep as much information as possible
|
||||
lvls <- sort(named_levels(x))
|
||||
paste(
|
||||
paste(seq_along(levels(re_fac)),
|
||||
levels(re_fac),
|
||||
paste(lvls,
|
||||
names(lvls),
|
||||
sep = ", "
|
||||
),
|
||||
collapse = " | "
|
||||
@ -338,7 +325,6 @@ ds2dd_detailed <- function(data,
|
||||
}
|
||||
}) |>
|
||||
(\(x)do.call(c, x))()
|
||||
}
|
||||
|
||||
dd <-
|
||||
dd |> dplyr::mutate(
|
||||
@ -357,33 +343,6 @@ ds2dd_detailed <- function(data,
|
||||
)
|
||||
}
|
||||
|
||||
#' Finish incomplete haven attributes substituting missings with values
|
||||
#'
|
||||
#' @param data haven labelled variable
|
||||
#'
|
||||
#' @return named vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' ds <- structure(c(1, 2, 3, 2, 10, 9),
|
||||
#' labels = c(Unknown = 9, Refused = 10),
|
||||
#' class = "haven_labelled"
|
||||
#' )
|
||||
#' haven::is.labelled(ds)
|
||||
#' attributes(ds)
|
||||
#' ds |> haven_all_levels()
|
||||
haven_all_levels <- function(data) {
|
||||
stopifnot(haven::is.labelled(data))
|
||||
if (length(attributes(data)$labels) == length(unique(data))) {
|
||||
out <- attributes(data)$labels
|
||||
} else {
|
||||
att <- attributes(data)$labels
|
||||
out <- c(unique(data[!data %in% att]), att) |>
|
||||
stats::setNames(c(unique(data[!data %in% att]), names(att)))
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
#' Guess time variables based on naming pattern
|
||||
#'
|
||||
@ -567,50 +526,6 @@ numchar2fct <- function(data, numeric.threshold = 6, character.throshold = 6) {
|
||||
)
|
||||
}
|
||||
|
||||
#' Extract attribute. Returns NA if none
|
||||
#'
|
||||
#' @param data vector
|
||||
#' @param attr attribute name
|
||||
#'
|
||||
#' @return character vector
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
#' attr(mtcars$mpg, "label") <- "testing"
|
||||
#' sapply(mtcars, get_attr)
|
||||
#' lapply(mtcars, \(.x)get_attr(.x, NULL))
|
||||
#' mtcars |>
|
||||
#' numchar2fct(numeric.threshold = 6) |>
|
||||
#' ds2dd_detailed()
|
||||
get_attr <- function(data, attr = NULL) {
|
||||
if (is.null(attr)) {
|
||||
attributes(data)
|
||||
} else {
|
||||
a <- attr(data, attr, exact = TRUE)
|
||||
if (is.null(a)) {
|
||||
NA
|
||||
} else {
|
||||
a
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#' Set attributes for named attribute. Appends if attr is NULL
|
||||
#'
|
||||
#' @param data vector
|
||||
#' @param label label
|
||||
#' @param attr attribute name
|
||||
#'
|
||||
#' @return vector with attribute
|
||||
#' @export
|
||||
#'
|
||||
set_attr <- function(data, label, attr = NULL) {
|
||||
if (is.null(attr)) {
|
||||
attributes(data) <- c(attributes(data),label)
|
||||
} else {
|
||||
attr(data, attr) <- label
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
|
51
man/as_factor.Rd
Normal file
51
man/as_factor.Rd
Normal file
@ -0,0 +1,51 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{as_factor}
|
||||
\alias{as_factor}
|
||||
\alias{as_factor.logical}
|
||||
\alias{as_factor.numeric}
|
||||
\alias{as_factor.character}
|
||||
\alias{as_factor.haven_labelled}
|
||||
\alias{as_factor.labelled}
|
||||
\title{Convert labelled vectors to factors while preserving attributes}
|
||||
\usage{
|
||||
as_factor(x, ...)
|
||||
|
||||
\method{as_factor}{logical}(x, ...)
|
||||
|
||||
\method{as_factor}{numeric}(x, ...)
|
||||
|
||||
\method{as_factor}{character}(x, ...)
|
||||
|
||||
\method{as_factor}{haven_labelled}(x, ...)
|
||||
|
||||
\method{as_factor}{labelled}(x, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object to coerce to a factor.}
|
||||
|
||||
\item{...}{Other arguments passed down to method.}
|
||||
}
|
||||
\description{
|
||||
This extends [forcats::as_factor()] as well as [haven::as_factor()], by appending
|
||||
original attributes except for "class" after converting to factor to avoid
|
||||
ta loss in case of rich formatted and labelled data.
|
||||
}
|
||||
\details{
|
||||
Please refer to parent functions for extended documentation.
|
||||
}
|
||||
\examples{
|
||||
# will preserve all attributes but class
|
||||
c(1, 4, 3, "A", 7, 8, 1) |> as_factor()
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10)
|
||||
) |>
|
||||
as_factor()
|
||||
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10),
|
||||
class = "haven_labelled"
|
||||
) |>
|
||||
as_factor()
|
||||
|
||||
}
|
34
man/fct2num.Rd
Normal file
34
man/fct2num.Rd
Normal file
@ -0,0 +1,34 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{fct2num}
|
||||
\alias{fct2num}
|
||||
\title{Allows conversion of factor to numeric values preserving original levels}
|
||||
\usage{
|
||||
fct2num(data)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{vector}
|
||||
}
|
||||
\value{
|
||||
numeric vector
|
||||
}
|
||||
\description{
|
||||
Allows conversion of factor to numeric values preserving original levels
|
||||
}
|
||||
\examples{
|
||||
c(1, 4, 3, "A", 7, 8, 1) |>
|
||||
as_factor() |> fct2num()
|
||||
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10),
|
||||
class = "haven_labelled"
|
||||
) |>
|
||||
as_factor() |>
|
||||
fct2num()
|
||||
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10)
|
||||
) |>
|
||||
as_factor() |>
|
||||
fct2num()
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ds2dd_detailed.R
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{get_attr}
|
||||
\alias{get_attr}
|
||||
\title{Extract attribute. Returns NA if none}
|
||||
|
@ -1,5 +1,5 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ds2dd_detailed.R
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{haven_all_levels}
|
||||
\alias{haven_all_levels}
|
||||
\title{Finish incomplete haven attributes substituting missings with values}
|
||||
|
25
man/named_levels.Rd
Normal file
25
man/named_levels.Rd
Normal file
@ -0,0 +1,25 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{named_levels}
|
||||
\alias{named_levels}
|
||||
\title{Get named vector of factor levels and values}
|
||||
\usage{
|
||||
named_levels(data, label = "labels", na.label = NULL, na.value = 99)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{factor}
|
||||
|
||||
\item{label}{character string of attribute with named vector of factor labels}
|
||||
}
|
||||
\value{
|
||||
named vector
|
||||
}
|
||||
\description{
|
||||
Get named vector of factor levels and values
|
||||
}
|
||||
\examples{
|
||||
structure(c(1, 2, 3, 2, 10, 9),
|
||||
labels = c(Unknown = 9, Refused = 10),
|
||||
class = "haven_labelled"
|
||||
) |> as_factor() |> named_levels()
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ds2dd_detailed.R
|
||||
% Please edit documentation in R/as_factor.R
|
||||
\name{set_attr}
|
||||
\alias{set_attr}
|
||||
\title{Set attributes for named attribute. Appends if attr is NULL}
|
||||
|
Loading…
Reference in New Issue
Block a user