new as_logical function to ease binary data interpretation - version bump. Hi March!

2025-04-01 21:52:32 +02:00 · 2025-03-04 13:00:49 +01:00 · 2025-03-04 13:00:49 +01:00 · bb24a7d7bd
commit bb24a7d7bd
parent f91aed0948
5 changed files with 162 additions and 2 deletions
--- a/3
+++ b/3
@ -1,6 +1,6 @@
 Package: REDCapCAST
 Title: REDCap Metadata Casting and Castellated Data Handling
-Version: 25.2.1
+Version: 25.3.1
 Authors@R: c(
    person("Andreas Gammelgaard", "Damsbo", email = "agdamsbo@clin.au.dk", 
    role = c("aut", "cre"),comment = c(ORCID = "0000-0002-7559-1154")), 
@ -72,6 +72,7 @@ Collate:
    'process_user_input.r'
    'REDCap_split.r'
    'as_factor.R'
+    'as_logical.R'
    'doc2dd.R'
    'ds2dd_detailed.R'
    'easy_redcap.R'
--- a/3
+++ b/3
@ -7,6 +7,8 @@ S3method(as_factor,haven_labelled)
 S3method(as_factor,labelled)
 S3method(as_factor,logical)
 S3method(as_factor,numeric)
+S3method(as_logical,data.frame)
+S3method(as_logical,default)
 S3method(fct_drop,data.frame)
 S3method(fct_drop,factor)
 S3method(process_user_input,character)
@ -18,6 +20,7 @@ export(all_na)
 export(apply_factor_labels)
 export(apply_field_label)
 export(as_factor)
+export(as_logical)
 export(case_match_regex_list)
 export(cast_data_overview)
 export(cast_meta_overview)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,7 +1,9 @@
-# REDCapCAST 25.2.1
+# REDCapCAST 25.3.1

 * FIX: `as_factor()` now interprets empty variables with empty levels attribute as logicals to avoid returning factors with empty levels.

+* NEW: `as_logical()`: interprets vectors with two levels as logical if values matches supplied list of logical pairs like "TRUE"/"FALSE", "Yes"/"No" or 1/2. Eases interpretation of data from databases with minimal metadata.
+
 # REDCapCAST 25.1.1

 The newly introduced extension of `forcats::fct_drop()` has been corrected to work as intended as a method.
--- a/R/as_logical.R
+++ b/R/as_logical.R
@ -0,0 +1,99 @@
+#' Interpret specific binary values as logicals
+#'
+#' @param x vector or data.frame
+#' @param values list of values to interpret as logicals. First value is
+#' @param ... ignored
+#' interpreted as TRUE.
+#'
+#' @returns vector
+#' @export
+#'
+#' @examples
+#' c(sample(c("TRUE", "FALSE"), 20, TRUE), NA) |>
+#'   as_logical() |>
+#'   class()
+#' ds <- dplyr::tibble(
+#'   B = factor(sample(c(1, 2), 20, TRUE)),
+#'   A = factor(sample(c("TRUE", "FALSE"), 20, TRUE)),
+#'   C = sample(c(3, 4), 20, TRUE),
+#'   D = factor(sample(c("In", "Out"), 20, TRUE))
+#' )
+#' ds |>
+#'   as_logical() |>
+#'   sapply(class)
+#' ds$A |> class()
+#' @name as_logical
+as_logical <- function(x,
+                       values = list(
+                         c("TRUE", "FALSE"),
+                         c("Yes", "No"),
+                         c(1, 0),
+                         c(1, 2)
+                       ),
+                       ...) {
+  UseMethod("as_logical")
+}
+
+#' @rdname as_logical
+#' @export
+as_logical.data.frame <- function(x,
+                                  values = list(
+                                    c("TRUE", "FALSE"),
+                                    c("Yes", "No"),
+                                    c(1, 0),
+                                    c(1, 2)
+                                  ),
+                                  ...) {
+  as.data.frame(lapply(x, \(.x){
+    as_logical.default(x = .x, values = values)
+  }))
+}
+
+#' @rdname as_logical
+#' @export
+as_logical.default <- function(x,
+                               values = list(
+                                 c("TRUE", "FALSE"),
+                                 c("Yes", "No"),
+                                 c(1, 0),
+                                 c(1, 2)
+                               ),
+                               ...) {
+  label <- REDCapCAST::get_attr(x, "label")
+
+  # browser()
+  out <- c()
+  if (any(
+    c(
+      "character",
+      "factor",
+      "numeric"
+    ) %in% class(x)
+  ) &&
+    length(unique(x[!is.na(x)])) == 2) {
+    if (is.factor(x)) {
+      match_index <- which(sapply(values, \(.x){
+        all(.x %in% levels(x))
+      }))
+    } else {
+      match_index <- which(sapply(values, \(.x){
+        all(.x %in% x)
+      }))
+    }
+    if (length(match_index) == 1) {
+      out <- x == values[[match_index]][1]
+    } else if (length(match_index) > 1) {
+      # If matching several, the first match is used.
+      out <- x == values[[match_index[1]]][1]
+    }
+  }
+
+  if (length(out) == 0) {
+    out <- x
+  }
+
+  if (!is.na(label)) {
+    out <- REDCapCAST::set_attr(out, label = label, attr = "label")
+  }
+  out
+}
--- a/man/as_logical.Rd
+++ b/man/as_logical.Rd
@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/as_logical.R
+\name{as_logical}
+\alias{as_logical}
+\alias{as_logical.data.frame}
+\alias{as_logical.default}
+\title{Interpret specific binary values as logicals}
+\usage{
+as_logical(
+  x,
+  values = list(c("TRUE", "FALSE"), c("Yes", "No"), c(1, 0), c(1, 2)),
+  ...
+)
+
+\method{as_logical}{data.frame}(
+  x,
+  values = list(c("TRUE", "FALSE"), c("Yes", "No"), c(1, 0), c(1, 2)),
+  ...
+)
+
+\method{as_logical}{default}(
+  x,
+  values = list(c("TRUE", "FALSE"), c("Yes", "No"), c(1, 0), c(1, 2)),
+  ...
+)
+}
+\arguments{
+\item{x}{vector or data.frame}
+
+\item{values}{list of values to interpret as logicals. First value is}
+
+\item{...}{ignored
+interpreted as TRUE.}
+}
+\value{
+vector
+}
+\description{
+Interpret specific binary values as logicals
+}
+\examples{
+c(sample(c("TRUE", "FALSE"), 20, TRUE), NA) |>
+  as_logical() |>
+  class()
+ds <- dplyr::tibble(
+  B = factor(sample(c(1, 2), 20, TRUE)),
+  A = factor(sample(c("TRUE", "FALSE"), 20, TRUE)),
+  C = sample(c(3, 4), 20, TRUE),
+  D = factor(sample(c("In", "Out"), 20, TRUE))
+)
+ds |>
+  as_logical() |>
+  sapply(class)
+ds$A |> class()
+}