utils::globalVariables(c("calculations", "choices")) #' Doc table to data dictionary - EARLY, DOCS MISSING #' #' @description #' Works well with `project.aid::docx2list()`. #' Allows defining a database in a text document (see provided template) for #' an easier to use data base creation. This approach allows easier #' collaboration when defining the database. The generic case is a data frame #' with variable names as values in a column. This is a format like the REDCap #' data dictionary, but gives a few options for formatting. #' #' @param data tibble or data.frame with all variable names in one column #' @param instrument.name character vector length one. Instrument name. #' @param col.variables variable names column (default = 1), allows dplyr #' subsetting #' @param list.datetime.format formatting for date/time detection. #' See `case_match_regex_list()` #' @param col.description descriptions column, allows dplyr #' subsetting. If empty, variable names will be used. #' @param col.condition conditions for branching column, allows dplyr #' subsetting. See `char2cond()`. #' @param col.subheader sub-header column, allows dplyr subsetting. #' See `format_subheader()`. #' @param subheader.tag formatting tag. Default is "h2" #' @param condition.minor.sep condition split minor. See `char2cond()`. #' Default is ",". #' @param condition.major.sep condition split major. See `char2cond()`. #' Default is ";". #' @param col.calculation calculations column. Has to be written exact. #' Character vector. #' @param col.choices choices column. See `char2choice()`. #' @param choices.char.sep choices split. See `char2choice()`. Default is "/". #' @param missing.default value for missing fields. Default is NA. #' #' @return tibble or data.frame (same as data) #' @export #' #' @examples #' # data <- dd_inst #' # data |> doc2dd(instrument.name = "evt", #' # col.description = 3, #' # col.condition = 4, #' # col.subheader = 2, #' # col.calculation = 5, #' # col.choices = 6) doc2dd <- function(data, instrument.name, col.variables = 1, list.datetime.format = list( date_dmy = "_dat[eo]$", time_hh_mm_ss = "_ti[md]e?$" ), col.description = NULL, col.condition = NULL, col.subheader = NULL, subheader.tag = "h2", condition.minor.sep = ",", condition.major.sep = ";", col.calculation = NULL, col.choices = NULL, choices.char.sep = "/", missing.default = NA) { data <- data |> dplyr::mutate(dplyr::across(dplyr::everything(), ~ dplyr::na_if(.x, c("")))) ## Defining the field name out <- data |> dplyr::mutate( field_name = dplyr::pick(col.variables) |> unlist() ) ## Defining the field label. Field name is used if no label is provided. if (is_missing(col.description)) { out <- out |> dplyr::mutate( field_label = field_name ) } else { out <- out |> dplyr::mutate( field_label = dplyr::pick(col.description) |> unlist() ) } ## Defining the sub-header if (!is_missing(col.subheader)) { out <- out |> dplyr::mutate( section_header = dplyr::pick(col.subheader) |> unlist() |> format_subheader(tag = subheader.tag) ) } ## Defining the choices if (is_missing(col.choices)) { out <- out |> dplyr::mutate( choices = missing.default ) } else { out <- out |> dplyr::mutate( choices = dplyr::pick(col.choices) |> unlist() |> char2choice(char.split = choices.char.sep) ) } ## Defining the calculations if (is_missing(col.calculation)) { out <- out |> dplyr::mutate( calculations = missing.default ) } else { # With inspiration from textclean package, curly apostrophe is replaced out <- out |> dplyr::mutate( calculations = dplyr::pick(col.calculation) |> unlist() |> tolower() |> replace_curly_quote() ) } ## Merging choices and calculations, defining field type and setting form name out <- out |> dplyr::mutate( select_choices_or_calculations = dplyr::coalesce(calculations, choices), field_type = dplyr::case_when(!is.na(choices) ~ "radio", !is.na(calculations) ~ "calc", .default = "text" ), form_name = instrument.name ) ## Defining branching logic from conditions if (is_missing(col.condition)) { out <- out |> dplyr::mutate( branching_logic = missing.default ) } else { out <- out |> dplyr::mutate( branching_logic = dplyr::pick(col.condition) |> unlist() |> char2cond(minor.split = condition.minor.sep, major.split = condition.major.sep) ) } ## Detecting data/time formatting from systematic field names if (is.null(list.datetime.format)) { out <- out |> dplyr::mutate( text_validation_type_or_show_slider_number = missing.default ) } else { out <- out |> dplyr::mutate( text_validation_type_or_show_slider_number = case_match_regex_list( field_name, list.datetime.format ) ) } ## Selecting relevant columns out <- out |> dplyr::select(dplyr::any_of(names(REDCapCAST::redcapcast_meta))) ## Merging and ordering columns for upload out |> list(REDCapCAST::redcapcast_meta |> dplyr::slice(0)) |> dplyr::bind_rows() |> dplyr::select(names(REDCapCAST::redcapcast_meta)) } #' Simple function to generate REDCap choices from character vector #' #' @param data vector #' @param char.split splitting character(s) #' @param raw specific values. Can be used for options of same length. #' @param .default default value for missing. Default is NA. #' #' @return vector #' @export #' #' @examples #' char2choice(c("yes/no"," yep. / nope ","",NA,"what"),.default=NA) char2choice <- function(data, char.split = "/", raw = NULL,.default=NA) { ls <- strsplit(x = data, split = char.split) ls |> purrr::map(function(.x) { if (is.null(raw)) { raw <- seq_len(length(.x)) } if (length(.x) == 0 | all(is.na(.x))) { .default } else { paste(paste0(raw, ", ",trimws(.x)), collapse = " | ") } }) |> purrr::list_c() } #' Simple function to generate REDCap branching logic from character vector #' #' @param data vector #' @param .default default value for missing. Default is NA. #' @param minor.split minor split #' @param major.split major split #' @param major.sep argument separation. Default is " or ". #' #' @return vector #' @export #' #' @examples #' #data <- dd_inst$betingelse #' #c("Extubation_novent, 2; Pacu_delay, 1") |> char2cond() char2cond <- function(data, minor.split = ",", major.split = ";", major.sep = " or ", .default = NA) { strsplit(x = data, split = major.split) |> purrr::map(function(.y) { strsplit(x = .y, split = minor.split) |> purrr::map(function(.x) { if (length(.x) == 0 | all(is.na(.x))) { .default } else { glue::glue("[{trimws(tolower(.x[1]))}]='{trimws(.x[2])}'") } }) |> purrr::list_c() |> glue::glue_collapse(sep = major.sep) }) |> purrr::list_c() } #' List-base regex case_when #' #' @description #' Mimics case_when for list of regex patterns and values. Used for date/time #' validation generation from name vector. Like case_when, the matches are in #' order of priority. #' Primarily used in REDCapCAST to do data type coding from systematic variable #' naming. #' #' @param data vector #' @param match.list list of case matches #' @param .default Default value for non-matches. Default is NA. #' #' @return vector #' @export #' #' @examples #' case_match_regex_list( #' c("test_date", "test_time", "test_tida", "test_tid"), #' list(date_dmy = "_dat[eo]$", time_hh_mm_ss = "_ti[md]e?$") #' ) case_match_regex_list <- function(data, match.list, .default = NA) { match.list |> purrr::imap(function(.z, .i) { dplyr::if_else(grepl(.z, data), .i, NA) }) |> (\(.x){ dplyr::coalesce(!!!.x) })() |> (\(.x){ dplyr::if_else(is.na(.x), .default, .x) })() } #' Multi missing check #' #' @param data character vector #' @param nas character vector of strings considered as NA #' #' @return logical vector is_missing <- function(data,nas=c("", "NA")) { if (is.null(data)) { TRUE } else { is.na(data) | data %in% nas } } #' Replace curly apostrophes and quotes from word #' #' @description #' Copied from textclean, which has not been updated since 2018 and is not #' on CRAN. Github:https://github.com/trinker/textclean #' #' @param x character vector #' #' @return character vector replace_curly_quote <- function(x){ replaces <- c('\x91', '\x92', '\x93', '\x94') Encoding(replaces) <- "latin1" for (i in 1:4) { x <- gsub(replaces[i], c("'", "'", "\"", "\"")[i], x, fixed = TRUE) } x }