REDCapCAST/R/ds2dd_detailed.R

utils::globalVariables(c(
  "stats::setNames",
  "field_name",
  "field_type",
  "select_choices_or_calculations",
  "field_label"
))
#' Try at determining which are true time only variables
#'
#' @description
#' This is just a try at guessing data type based on data class and column names
#' hoping for a tiny bit of naming consistency. R does not include a time-only
#' data format natively, so the "hms" class from `readr` is used. This
#' has to be converted to character class before REDCap upload.
#'
#' @param data data set
#' @param validate flag to output validation data. Will output list.
#' @param sel.pos Positive selection regex string
#' @param sel.neg Negative selection regex string
#'
#' @return character vector or list depending on `validate` flag.
#' @export
#'
#' @examples
#' data <- redcapcast_data
#' data |> guess_time_only_filter()
#' data |>
#'   guess_time_only_filter(validate = TRUE) |>
#'   lapply(head)
guess_time_only_filter <- function(data,
                                   validate = FALSE,
                                   sel.pos = "[Tt]i[d(me)]",
                                   sel.neg = "[Dd]at[eo]") {
  datetime_nms <- data |>
    lapply(\(x) any(c("POSIXct", "hms") %in% class(x))) |>
    (\(x) names(data)[do.call(c, x)])()

  time_only_log <- datetime_nms |> (\(x) {
    ## Detects which are determined true Time only variables
    ## Inspection is necessary
    grepl(pattern = sel.pos, x = x) &
      !grepl(pattern = sel.neg, x = x)
  })()

  if (validate) {
    list(
      "is.POSIX" = data[datetime_nms],
      "is.datetime" = data[datetime_nms[!time_only_log]],
      "is.time_only" = data[datetime_nms[time_only_log]]
    )
  } else {
    datetime_nms[time_only_log]
  }
}

#' Correction based on time_only_filter function
#'
#'
#' @param data data set
#' @param ... arguments passed on to `guess_time_only_filter()`
#'
#' @return tibble
#' @importFrom readr parse_time
#'
#' @examples
#' data <- redcapcast_data
#' ## data |> time_only_correction()
time_only_correction <- function(data, ...) {
  nms <- guess_time_only_filter(data, ...)
  z <- nms |>
    lapply(\(y) {
      readr::parse_time(format(data[[y]], format = "%H:%M:%S"))
    }) |>
    suppressMessages(dplyr::bind_cols()) |>
    stats::setNames(nm = nms)
  data[nms] <- z
  data
}

#' Change "hms" to "character" for REDCap upload.
#'
#' @param data data set
#'
#' @return data.frame or tibble
#'
#' @examples
#' data <- redcapcast_data
#' ## data |> time_only_correction() |> hms2character()
hms2character <- function(data) {
  data |>
    lapply(function(x) {
      if ("hms" %in% class(x)) {
        as.character(x)
      } else {
        x
      }
    }) |>
    dplyr::bind_cols()
}

#' Extract data from stata file for data dictionary
#'
#' @details
#' This function is a natural development of the ds2dd() function. It assumes
#' that the first column is the ID-column. No checks.
#' Please, do always inspect the data dictionary before upload.
#'
#' Ensure, that the data set is formatted with as much information as possible.
#'
#' `field.type` can be supplied
#'
#' @param data data frame
#' @param date.format date format, character string. ymd/dmy/mdy. dafault is
#' dmy.
#' @param add.auto.id flag to add id column
#' @param form.name manually specify form name(s). Vector of length 1 or
#' ncol(data). Default is NULL and "data" is used.
#' @param form.sep If supplied dataset has form names as suffix or prefix to the
#' column/variable names, the seperator can be specified. If supplied, the
#' form.name is ignored. Default is NULL.
#' @param form.prefix Flag to set if form is prefix (TRUE) or suffix (FALSE) to
#' the column names. Assumes all columns have pre- or suffix if specified.
#' @param field.type manually specify field type(s). Vector of length 1 or
#' ncol(data). Default is NULL and "text" is used for everything but factors,
#' which wil get "radio".
#' @param field.label manually specify field label(s). Vector of length 1 or
#' ncol(data). Default is NULL and colnames(data) is used or attribute
#' `field.label.attr` for haven_labelled data set (imported .dta file with
#' `haven::read_dta()`).
#' @param field.label.attr attribute name for named labels for haven_labelled
#' data set (imported .dta file with `haven::read_dta()`. Default is "label"
#' @param field.validation manually specify field validation(s). Vector of
#' length 1 or ncol(data). Default is NULL and `levels()` are used for factors
#' or attribute `factor.labels.attr` for haven_labelled data set (imported .dta
#' file with `haven::read_dta()`).
#' @param metadata redcap metadata headings. Default is
#' REDCapCAST:::metadata_names.
#' @param validate.time Flag to validate guessed time columns
#' @param time.var.sel.pos Positive selection regex string passed to
#' `gues_time_only_filter()` as sel.pos.
#' @param time.var.sel.neg Negative selection regex string passed to
#' `gues_time_only_filter()` as sel.neg.
#'
#' @return list of length 2
#' @export
#'
#' @examples
#' data <- REDCapCAST::redcapcast_data
#' data |> ds2dd_detailed(validate.time = TRUE)
#' data |> ds2dd_detailed()
#' iris |> ds2dd_detailed(add.auto.id = TRUE)
#' iris |>
#'   ds2dd_detailed(
#'     add.auto.id = TRUE,
#'     form.name = sample(c("b", "c"), size = 6, replace = TRUE, prob = rep(.5, 2))
#'   ) |>
#'   purrr::pluck("meta")
#' mtcars |> ds2dd_detailed(add.auto.id = TRUE)
#' data <- iris |>
#'   ds2dd_detailed(add.auto.id = TRUE) |>
#'   purrr::pluck("data")
#' names(data) <- glue::glue("{sample(x = c('a','b'),size = length(names(data)),
#' replace=TRUE,prob = rep(x=.5,2))}__{names(data)}")
#' data |> ds2dd_detailed(form.sep = "__")
ds2dd_detailed <- function(data,
                           add.auto.id = FALSE,
                           date.format = "dmy",
                           form.name = NULL,
                           form.sep = NULL,
                           form.prefix = TRUE,
                           field.type = NULL,
                           field.label = NULL,
                           field.label.attr = "label",
                           field.validation = NULL,
                           metadata = names(REDCapCAST::redcapcast_meta),
                           validate.time = FALSE,
                           time.var.sel.pos = "[Tt]i[d(me)]",
                           time.var.sel.neg = "[Dd]at[eo]") {
  ## Handles the odd case of no id column present
  if (add.auto.id) {
    data <- dplyr::tibble(
      record_id = seq_len(nrow(data)),
      data
    )
    message("A default id column has been added")
  }

  if (validate.time) {
    return(data |> guess_time_only_filter(validate = TRUE))
  }

  if (lapply(data, haven::is.labelled) |> (\(x)do.call(c, x))() |> any()) {
    message("Data seems to be imported with haven from a Stata (.dta) file and
            will be treated as such.")
    data.source <- "dta"
  } else {
    data.source <- ""
  }

  ## data classes

  ### Only keeps the first class, as time fields (POSIXct/POSIXt) has two
  ### classes
  if (data.source == "dta") {
    data_classes <-
      data |>
      haven::as_factor() |>
      time_only_correction(
        sel.pos = time.var.sel.pos,
        sel.neg = time.var.sel.neg
      ) |>
      lapply(\(x)class(x)[1]) |>
      (\(x)do.call(c, x))()
  } else {
    data_classes <-
      data |>
      time_only_correction(
        sel.pos = time.var.sel.pos,
        sel.neg = time.var.sel.neg
      ) |>
      lapply(\(x)class(x)[1]) |>
      (\(x)do.call(c, x))()
  }

  ## ---------------------------------------
  ## Building the data dictionary
  ## ---------------------------------------

  ## skeleton

  dd <- data.frame(matrix(ncol = length(metadata), nrow = ncol(data))) |>
    stats::setNames(metadata) |>
    dplyr::tibble()

  ## form_name and field_name

  if (!is.null(form.sep)) {
    if (form.sep != "") {
      parts <- strsplit(names(data), split = form.sep)

      ## form.sep should be unique, but handles re-occuring pattern (by only considering first or last) and form.prefix defines if form is prefix or suffix
      ## The other split part is used as field names
      if (form.prefix){
        dd$form_name <- clean_redcap_name(Reduce(c,lapply(parts,\(.x) .x[[1]])))
        dd$field_name <- Reduce(c,lapply(parts,\(.x) paste(.x[seq_len(length(.x))[-1]],collapse=form.sep)))
      } else {
        dd$form_name <- clean_redcap_name(Reduce(c,lapply(parts,\(.x) .x[[length(.x)]])))
        dd$field_name <- Reduce(c,lapply(parts,\(.x) paste(.x[seq_len(length(.x)-1)],collapse=form.sep)))
      }
    } else {
      dd$form_name <- "data"
      dd$field_name <- gsub(" ", "_", tolower(colnames(data)))
    }
  } else {
    ## if no form name prefix, the colnames are used as field_names
    dd$field_name <- gsub(" ", "_", tolower(colnames(data)))

    if (is.null(form.name)) {
      dd$form_name <- "data"
    } else {
      if (length(form.name) == 1 || length(form.name) == nrow(dd)) {
        dd$form_name <- form.name
      } else {
        stop("Length of supplied 'form.name' has to be one (1) or ncol(data).")
      }
    }
  }

  ## field_label

  if (is.null(field.label)) {
    if (data.source == "dta") {
      dd$field_label <- data |>
        lapply(function(x) {
          if (haven::is.labelled(x)) {
            attributes(x)[[field.label.attr]]
          } else {
            NA
          }
        }) |>
        (\(x)do.call(c, x))()
    }

    dd <-
      dd |> dplyr::mutate(field_label = dplyr::if_else(is.na(field_label),
        field_name, field_label
      ))
  } else {
    if (length(field.label) == 1 || length(field.label) == nrow(dd)) {
      dd$field_label <- field.label
    } else {
      stop("Length of supplied 'field.label' has to be one (1) or ncol(data).")
    }
  }


  ## field_type

  if (is.null(field.type)) {
    dd$field_type <- "text"

    dd <-
      dd |> dplyr::mutate(field_type = dplyr::if_else(data_classes == "factor",
        "radio", field_type
      ))
  } else {
    if (length(field.type) == 1 || length(field.type) == nrow(dd)) {
      dd$field_type <- field.type
    } else {
      stop("Length of supplied 'field.type' has to be one (1) or ncol(data).")
    }
  }

  ## validation

  if (is.null(field.validation)) {
    dd <-
      dd |> dplyr::mutate(
        text_validation_type_or_show_slider_number = dplyr::case_when(
          data_classes == "Date" ~ paste0("date_", date.format),
          data_classes ==
            "hms" ~ "time_hh_mm_ss",
          ## Self invented format after filtering
          data_classes ==
            "POSIXct" ~ paste0("datetime_", date.format),
          data_classes ==
            "numeric" ~ "number"
        )
      )
  } else {
    if (length(field.validation) == 1 || length(field.validation) == nrow(dd)) {
      dd$text_validation_type_or_show_slider_number <- field.validation
    } else {
      stop("Length of supplied 'field.validation'
           has to be one (1) or ncol(data).")
    }
  }


  ## choices

  if (data.source == "dta") {
    factor_levels <- data |>
      lapply(function(x) {
        if (haven::is.labelled(x)) {
          att <- attributes(x)$labels
          paste(paste(att, names(att), sep = ", "), collapse = " | ")
        } else {
          NA
        }
      }) |>
      (\(x)do.call(c, x))()
  } else {
    factor_levels <- data |>
      lapply(function(x) {
        if (is.factor(x)) {
          ## Re-factors to avoid confusion with missing levels
          ## Assumes all relevant levels are represented in the data
          re_fac <- factor(x)
          paste(
            paste(seq_along(levels(re_fac)),
              levels(re_fac),
              sep = ", "
            ),
            collapse = " | "
          )
        } else {
          NA
        }
      }) |>
      (\(x)do.call(c, x))()
  }

  dd <-
    dd |> dplyr::mutate(
      select_choices_or_calculations = dplyr::if_else(
        is.na(factor_levels),
        select_choices_or_calculations,
        factor_levels
      )
    )

  list(
    data = data |>
      time_only_correction(
        sel.pos = time.var.sel.pos,
        sel.neg = time.var.sel.neg
      ) |>
      hms2character() |>
      stats::setNames(dd$field_name),
    meta = dd
  )
}

### Completion
#' Completion marking based on completed upload
#'
#' @param upload output list from `REDCapR::redcap_write()`
#' @param ls output list from `ds2dd_detailed()`
#'
#' @return list with `REDCapR::redcap_write()` results
mark_complete <- function(upload, ls) {
  data <- ls$data
  meta <- ls$meta
  forms <- unique(meta$form_name)
  cbind(
    data[[1]][data[[1]] %in% upload$affected_ids],
    data.frame(matrix(2,
      ncol = length(forms),
      nrow = upload$records_affected_count
    ))
  ) |>
    stats::setNames(c(names(data)[1], paste0(forms, "_complete")))
}
linting 2024-02-27 13:20:21 +01:00			`utils::globalVariables(c(`
			`"stats::setNames",`
			`"field_name",`
			`"field_type",`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`"select_choices_or_calculations",`
			`"field_label"`
linting 2024-02-27 13:20:21 +01:00			`))`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`#' Try at determining which are true time only variables`
			`#'`
			`#' @description`
			`#' This is just a try at guessing data type based on data class and column names`
			`#' hoping for a tiny bit of naming consistency. R does not include a time-only`
			#' data format natively, so the "hms" class from `readr` is used. This
			`#' has to be converted to character class before REDCap upload.`
			`#'`
			`#' @param data data set`
			`#' @param validate flag to output validation data. Will output list.`
			`#' @param sel.pos Positive selection regex string`
			`#' @param sel.neg Negative selection regex string`
			`#'`
			#' @return character vector or list depending on `validate` flag.
			`#' @export`
			`#'`
			`#' @examples`
			`#' data <- redcapcast_data`
			`#' data \|> guess_time_only_filter()`
linting 2024-02-27 13:20:21 +01:00			`#' data \|>`
			`#' guess_time_only_filter(validate = TRUE) \|>`
			`#' lapply(head)`
			`guess_time_only_filter <- function(data,`
			`validate = FALSE,`
			`sel.pos = "[Tt]i[d(me)]",`
			`sel.neg = "[Dd]at[eo]") {`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`datetime_nms <- data \|>`
linting 2024-02-27 13:20:21 +01:00			`lapply(\(x) any(c("POSIXct", "hms") %in% class(x))) \|>`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`(\(x) names(data)[do.call(c, x)])()`

			`time_only_log <- datetime_nms \|> (\(x) {`
			`## Detects which are determined true Time only variables`
			`## Inspection is necessary`
			`grepl(pattern = sel.pos, x = x) &`
			`!grepl(pattern = sel.neg, x = x)`
			`})()`

			`if (validate) {`
			`list(`
			`"is.POSIX" = data[datetime_nms],`
			`"is.datetime" = data[datetime_nms[!time_only_log]],`
			`"is.time_only" = data[datetime_nms[time_only_log]]`
			`)`
			`} else {`
			`datetime_nms[time_only_log]`
			`}`
			`}`

linting 2024-02-27 13:20:21 +01:00			`#' Correction based on time_only_filter function`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`#'`
			`#'`
			`#' @param data data set`
			#' @param ... arguments passed on to `guess_time_only_filter()`
			`#'`
			`#' @return tibble`
			`#' @importFrom readr parse_time`
			`#'`
			`#' @examples`
			`#' data <- redcapcast_data`
			`#' ## data \|> time_only_correction()`
			`time_only_correction <- function(data, ...) {`
			`nms <- guess_time_only_filter(data, ...)`
			`z <- nms \|>`
			`lapply(\(y) {`
			`readr::parse_time(format(data[[y]], format = "%H:%M:%S"))`
			`}) \|>`
			`suppressMessages(dplyr::bind_cols()) \|>`
			`stats::setNames(nm = nms)`
			`data[nms] <- z`
			`data`
			`}`

			`#' Change "hms" to "character" for REDCap upload.`
			`#'`
			`#' @param data data set`
			`#'`
			`#' @return data.frame or tibble`
			`#'`
			`#' @examples`
			`#' data <- redcapcast_data`
			`#' ## data \|> time_only_correction() \|> hms2character()`
			`hms2character <- function(data) {`
			`data \|>`
			`lapply(function(x) {`
			`if ("hms" %in% class(x)) {`
			`as.character(x)`
			`} else {`
			`x`
			`}`
			`}) \|>`
			`dplyr::bind_cols()`
			`}`

			`#' Extract data from stata file for data dictionary`
			`#'`
			`#' @details`
			`#' This function is a natural development of the ds2dd() function. It assumes`
			`#' that the first column is the ID-column. No checks.`
			`#' Please, do always inspect the data dictionary before upload.`
			`#'`
			`#' Ensure, that the data set is formatted with as much information as possible.`
			`#'`
			#' `field.type` can be supplied
			`#'`
			`#' @param data data frame`
			`#' @param date.format date format, character string. ymd/dmy/mdy. dafault is`
			`#' dmy.`
			`#' @param add.auto.id flag to add id column`
			`#' @param form.name manually specify form name(s). Vector of length 1 or`
			`#' ncol(data). Default is NULL and "data" is used.`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`#' @param form.sep If supplied dataset has form names as suffix or prefix to the`
			`#' column/variable names, the seperator can be specified. If supplied, the`
preparing for next version 2024-10-24 11:41:48 +02:00			`#' form.name is ignored. Default is NULL.`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`#' @param form.prefix Flag to set if form is prefix (TRUE) or suffix (FALSE) to`
			`#' the column names. Assumes all columns have pre- or suffix if specified.`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`#' @param field.type manually specify field type(s). Vector of length 1 or`
			`#' ncol(data). Default is NULL and "text" is used for everything but factors,`
			`#' which wil get "radio".`
			`#' @param field.label manually specify field label(s). Vector of length 1 or`
			`#' ncol(data). Default is NULL and colnames(data) is used or attribute`
			#' `field.label.attr` for haven_labelled data set (imported .dta file with
			#' `haven::read_dta()`).
			`#' @param field.label.attr attribute name for named labels for haven_labelled`
			#' data set (imported .dta file with `haven::read_dta()`. Default is "label"
			`#' @param field.validation manually specify field validation(s). Vector of`
			#' length 1 or ncol(data). Default is NULL and `levels()` are used for factors
linting 2024-02-27 13:20:21 +01:00			#' or attribute `factor.labels.attr` for haven_labelled data set (imported .dta
			#' file with `haven::read_dta()`).
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`#' @param metadata redcap metadata headings. Default is`
			`#' REDCapCAST:::metadata_names.`
			`#' @param validate.time Flag to validate guessed time columns`
			`#' @param time.var.sel.pos Positive selection regex string passed to`
			#' `gues_time_only_filter()` as sel.pos.
			`#' @param time.var.sel.neg Negative selection regex string passed to`
			#' `gues_time_only_filter()` as sel.neg.
			`#'`
			`#' @return list of length 2`
			`#' @export`
			`#'`
			`#' @examples`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`#' data <- REDCapCAST::redcapcast_data`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`#' data \|> ds2dd_detailed(validate.time = TRUE)`
			`#' data \|> ds2dd_detailed()`
			`#' iris \|> ds2dd_detailed(add.auto.id = TRUE)`
preparing for next version 2024-10-24 11:41:48 +02:00			`#' iris \|>`
			`#' ds2dd_detailed(`
			`#' add.auto.id = TRUE,`
			`#' form.name = sample(c("b", "c"), size = 6, replace = TRUE, prob = rep(.5, 2))`
			`#' ) \|>`
			`#' purrr::pluck("meta")`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`#' mtcars \|> ds2dd_detailed(add.auto.id = TRUE)`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`#' data <- iris \|>`
			`#' ds2dd_detailed(add.auto.id = TRUE) \|>`
			`#' purrr::pluck("data")`
			`#' names(data) <- glue::glue("{sample(x = c('a','b'),size = length(names(data)),`
			`#' replace=TRUE,prob = rep(x=.5,2))}__{names(data)}")`
preparing for next version 2024-10-24 11:41:48 +02:00			`#' data \|> ds2dd_detailed(form.sep = "__")`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`ds2dd_detailed <- function(data,`
			`add.auto.id = FALSE,`
			`date.format = "dmy",`
			`form.name = NULL,`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`form.sep = NULL,`
			`form.prefix = TRUE,`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`field.type = NULL,`
			`field.label = NULL,`
linting 2024-02-27 13:20:21 +01:00			`field.label.attr = "label",`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`field.validation = NULL,`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`metadata = names(REDCapCAST::redcapcast_meta),`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`validate.time = FALSE,`
			`time.var.sel.pos = "[Tt]i[d(me)]",`
			`time.var.sel.neg = "[Dd]at[eo]") {`
			`## Handles the odd case of no id column present`
			`if (add.auto.id) {`
			`data <- dplyr::tibble(`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`record_id = seq_len(nrow(data)),`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`data`
			`)`
			`message("A default id column has been added")`
			`}`

			`if (validate.time) {`
			`return(data \|> guess_time_only_filter(validate = TRUE))`
			`}`

			`if (lapply(data, haven::is.labelled) \|> (\(x)do.call(c, x))() \|> any()) {`
linting 2024-02-27 13:20:21 +01:00			`message("Data seems to be imported with haven from a Stata (.dta) file and`
			`will be treated as such.")`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`data.source <- "dta"`
			`} else {`
			`data.source <- ""`
			`}`

			`## data classes`

linting 2024-02-27 13:20:21 +01:00			`### Only keeps the first class, as time fields (POSIXct/POSIXt) has two`
			`### classes`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`if (data.source == "dta") {`
			`data_classes <-`
			`data \|>`
			`haven::as_factor() \|>`
linting 2024-02-27 13:20:21 +01:00			`time_only_correction(`
			`sel.pos = time.var.sel.pos,`
			`sel.neg = time.var.sel.neg`
			`) \|>`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`lapply(\(x)class(x)[1]) \|>`
			`(\(x)do.call(c, x))()`
			`} else {`
			`data_classes <-`
			`data \|>`
linting 2024-02-27 13:20:21 +01:00			`time_only_correction(`
			`sel.pos = time.var.sel.pos,`
			`sel.neg = time.var.sel.neg`
			`) \|>`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`lapply(\(x)class(x)[1]) \|>`
			`(\(x)do.call(c, x))()`
			`}`

			`## ---------------------------------------`
			`## Building the data dictionary`
			`## ---------------------------------------`

			`## skeleton`

			`dd <- data.frame(matrix(ncol = length(metadata), nrow = ncol(data))) \|>`
			`stats::setNames(metadata) \|>`
			`dplyr::tibble()`

updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`## form_name and field_name`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`if (!is.null(form.sep)) {`
preparing for next version 2024-10-24 11:41:48 +02:00			`if (form.sep != "") {`
			`parts <- strsplit(names(data), split = form.sep)`

			`## form.sep should be unique, but handles re-occuring pattern (by only considering first or last) and form.prefix defines if form is prefix or suffix`
			`## The other split part is used as field names`
			`if (form.prefix){`
			`dd$form_name <- clean_redcap_name(Reduce(c,lapply(parts,\(.x) .x[[1]])))`
			`dd$field_name <- Reduce(c,lapply(parts,\(.x) paste(.x[seq_len(length(.x))[-1]],collapse=form.sep)))`
			`} else {`
			`dd$form_name <- clean_redcap_name(Reduce(c,lapply(parts,\(.x) .x[[length(.x)]])))`
			`dd$field_name <- Reduce(c,lapply(parts,\(.x) paste(.x[seq_len(length(.x)-1)],collapse=form.sep)))`
			`}`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`} else {`
			`dd$form_name <- "data"`
			`dd$field_name <- gsub(" ", "_", tolower(colnames(data)))`
			`}`
preparing for next version 2024-10-24 11:41:48 +02:00			`} else {`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`## if no form name prefix, the colnames are used as field_names`
			`dd$field_name <- gsub(" ", "_", tolower(colnames(data)))`
preparing for next version 2024-10-24 11:41:48 +02:00
			`if (is.null(form.name)) {`
			`dd$form_name <- "data"`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`} else {`
preparing for next version 2024-10-24 11:41:48 +02:00			`if (length(form.name) == 1 \|\| length(form.name) == nrow(dd)) {`
			`dd$form_name <- form.name`
			`} else {`
			`stop("Length of supplied 'form.name' has to be one (1) or ncol(data).")`
			`}`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`}`
			`}`

			`## field_label`

			`if (is.null(field.label)) {`
			`if (data.source == "dta") {`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`dd$field_label <- data \|>`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`lapply(function(x) {`
			`if (haven::is.labelled(x)) {`
			`attributes(x)[[field.label.attr]]`
			`} else {`
			`NA`
			`}`
			`}) \|>`
			`(\(x)do.call(c, x))()`
			`}`

			`dd <-`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`dd \|> dplyr::mutate(field_label = dplyr::if_else(is.na(field_label),`
			`field_name, field_label`
linting 2024-02-27 13:20:21 +01:00			`))`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`} else {`
linting 2024-02-27 13:20:21 +01:00			`if (length(field.label) == 1 \|\| length(field.label) == nrow(dd)) {`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`dd$field_label <- field.label`
			`} else {`
			`stop("Length of supplied 'field.label' has to be one (1) or ncol(data).")`
			`}`
			`}`


			`## field_type`

			`if (is.null(field.type)) {`
			`dd$field_type <- "text"`

			`dd <-`
linting 2024-02-27 13:20:21 +01:00			`dd \|> dplyr::mutate(field_type = dplyr::if_else(data_classes == "factor",`
			`"radio", field_type`
			`))`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`} else {`
linting 2024-02-27 13:20:21 +01:00			`if (length(field.type) == 1 \|\| length(field.type) == nrow(dd)) {`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`dd$field_type <- field.type`
			`} else {`
			`stop("Length of supplied 'field.type' has to be one (1) or ncol(data).")`
			`}`
			`}`

			`## validation`

			`if (is.null(field.validation)) {`
			`dd <-`
			`dd \|> dplyr::mutate(`
			`text_validation_type_or_show_slider_number = dplyr::case_when(`
			`data_classes == "Date" ~ paste0("date_", date.format),`
			`data_classes ==`
			`"hms" ~ "time_hh_mm_ss",`
			`## Self invented format after filtering`
			`data_classes ==`
			`"POSIXct" ~ paste0("datetime_", date.format),`
			`data_classes ==`
			`"numeric" ~ "number"`
			`)`
			`)`
			`} else {`
linting 2024-02-27 13:20:21 +01:00			`if (length(field.validation) == 1 \|\| length(field.validation) == nrow(dd)) {`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`dd$text_validation_type_or_show_slider_number <- field.validation`
			`} else {`
linting 2024-02-27 13:20:21 +01:00			`stop("Length of supplied 'field.validation'`
			`has to be one (1) or ncol(data).")`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`}`
			`}`



			`## choices`

			`if (data.source == "dta") {`
			`factor_levels <- data \|>`
			`lapply(function(x) {`
			`if (haven::is.labelled(x)) {`
			`att <- attributes(x)$labels`
			`paste(paste(att, names(att), sep = ", "), collapse = " \| ")`
			`} else {`
			`NA`
			`}`
			`}) \|>`
			`(\(x)do.call(c, x))()`
			`} else {`
			`factor_levels <- data \|>`
			`lapply(function(x) {`
			`if (is.factor(x)) {`
			`## Re-factors to avoid confusion with missing levels`
major update with new functions and renv is out! see NEWS section 2024-06-07 10:35:16 +02:00			`## Assumes all relevant levels are represented in the data`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`re_fac <- factor(x)`
linting 2024-02-27 13:20:21 +01:00			`paste(`
major update with new functions and renv is out! see NEWS section 2024-06-07 10:35:16 +02:00			`paste(seq_along(levels(re_fac)),`
linting 2024-02-27 13:20:21 +01:00			`levels(re_fac),`
			`sep = ", "`
			`),`
			`collapse = " \| "`
			`)`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`} else {`
			`NA`
			`}`
			`}) \|>`
			`(\(x)do.call(c, x))()`
			`}`

			`dd <-`
			`dd \|> dplyr::mutate(`
			`select_choices_or_calculations = dplyr::if_else(`
			`is.na(factor_levels),`
			`select_choices_or_calculations,`
			`factor_levels`
			`)`
			`)`

			`list(`
			`data = data \|>`
linting 2024-02-27 13:20:21 +01:00			`time_only_correction(`
			`sel.pos = time.var.sel.pos,`
			`sel.neg = time.var.sel.neg`
			`) \|>`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`hms2character() \|>`
updated to handle form names as variable name pre or suffix. prepared for shiny app extension 2024-04-12 12:19:56 +02:00			`stats::setNames(dd$field_name),`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`meta = dd`
			`)`
			`}`

			`### Completion`
			`#' Completion marking based on completed upload`
			`#'`
			#' @param upload output list from `REDCapR::redcap_write()`
			#' @param ls output list from `ds2dd_detailed()`
			`#'`
			#' @return list with `REDCapR::redcap_write()` results
linting 2024-02-27 13:20:21 +01:00			`mark_complete <- function(upload, ls) {`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`data <- ls$data`
			`meta <- ls$meta`
			`forms <- unique(meta$form_name)`
linting 2024-02-27 13:20:21 +01:00			`cbind(`
			`data[[1]][data[[1]] %in% upload$affected_ids],`
			`data.frame(matrix(2,`
			`ncol = length(forms),`
			`nrow = upload$records_affected_count`
			`))`
			`) \|>`
			`stats::setNames(c(names(data)[1], paste0(forms, "_complete")))`
new function`ds2dd_detailed()`which includes more details than the old `ds2dd()`. 2024-01-18 14:57:12 +01:00			`}`