adding as exported element

formatting
updated vignette and formatting
2025-04-02 22:22:33 +02:00 · 2024-11-18 16:59:06 +01:00 · 2024-11-18 16:50:03 +01:00 · 2024-11-18 16:26:10 +01:00 · 2024-11-18 16:14:45 +01:00 · 2024-11-18 14:41:44 +01:00
25 changed files with 540 additions and 179 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,4 @@ drafting
 .DS_Store
 cran-comments.md
 ~/REDCapCAST/inst/shiny-examples/casting/rsconnect
+~/REDCapCAST/inst/shiny-examples/casting/rsconnect/
--- a/3
+++ b/3
@ -58,7 +58,8 @@ Imports:
    zip,
    assertthat,
    openxlsx2,
-    readODS
+    readODS,
+    forcats
 Collate: 
    'REDCapCAST-package.R'
    'utils.r'
--- a/7
+++ b/7
@ -16,14 +16,19 @@ export(doc2dd)
 export(ds2dd)
 export(ds2dd_detailed)
 export(easy_redcap)
+export(export_redcap_instrument)
 export(file_extension)
 export(focused_metadata)
 export(format_subheader)
 export(get_api_key)
+export(guess_time_only)
 export(guess_time_only_filter)
+export(haven_all_levels)
 export(html_tag_wrap)
 export(is_repeated_longitudinal)
 export(match_fields_to_form)
+export(numchar2fct)
+export(parse_data)
 export(process_user_input)
 export(read_input)
 export(read_redcap_instrument)
@ -33,9 +38,11 @@ export(sanitize_split)
 export(shiny_cast)
 export(split_non_repeating_forms)
 export(strsplitx)
+export(var2fct)
 importFrom(REDCapR,redcap_event_instruments)
 importFrom(REDCapR,redcap_metadata_read)
 importFrom(REDCapR,redcap_read)
+importFrom(forcats,as_factor)
 importFrom(keyring,key_get)
 importFrom(keyring,key_list)
 importFrom(keyring,key_set)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,3 +1,13 @@
+# REDCapCAST 24.11.2
+
+24.11.1 was rejected on CRAN based on wrong title capitalisation. This was an opportunity to extend the package overhaul.
+
+Documentation has been updated. Data parser functions have been added (based on readr) and separated from the ds2dd_detailed().
+
+Vignettes and documentation have been restructured.
+
+This package has been detached from the REDCapRITS, which it was originally forked from. The data split function will be kept, while testing will be rewritten. This projects has evolved away from the original fork, so I think this detachment is fair.
+
 # REDCapCAST 24.11.1

 Revised tests.
--- a/R/ds2dd_detailed.R
+++ b/R/ds2dd_detailed.R
@ -135,18 +135,12 @@ hms2character <- function(data) {
 #' file with `haven::read_dta()`).
 #' @param metadata redcap metadata headings. Default is
 #' REDCapCAST:::metadata_names.
-#' @param validate.time Flag to validate guessed time columns
-#' @param time.var.sel.pos Positive selection regex string passed to
-#' `gues_time_only_filter()` as sel.pos.
-#' @param time.var.sel.neg Negative selection regex string passed to
-#' `gues_time_only_filter()` as sel.neg.
 #'
 #' @return list of length 2
 #' @export
 #'
 #' @examples
 #' data <- REDCapCAST::redcapcast_data
-#' data |> ds2dd_detailed(validate.time = TRUE)
 #' data |> ds2dd_detailed()
 #' iris |> ds2dd_detailed(add.auto.id = TRUE)
 #' iris |>
@ -172,10 +166,7 @@ ds2dd_detailed <- function(data,
                           field.label = NULL,
                           field.label.attr = "label",
                           field.validation = NULL,
-                           metadata = names(REDCapCAST::redcapcast_meta),
-                           validate.time = FALSE,
-                           time.var.sel.pos = "[Tt]i[d(me)]",
-                           time.var.sel.neg = "[Dd]at[eo]") {
+                           metadata = names(REDCapCAST::redcapcast_meta)) {
  ## Handles the odd case of no id column present
  if (add.auto.id) {
    data <- dplyr::tibble(
@ -185,43 +176,6 @@ ds2dd_detailed <- function(data,
    message("A default id column has been added")
  }

-  if (validate.time) {
-    return(data |> guess_time_only_filter(validate = TRUE))
-  }
-
-  if (lapply(data, haven::is.labelled) |> (\(x)do.call(c, x))() |> any()) {
-    message("Data seems to be imported with haven from a Stata (.dta) file and
-            will be treated as such.")
-    data.source <- "dta"
-  } else {
-    data.source <- ""
-  }
-
-  ## data classes
-
-  ### Only keeps the first class, as time fields (POSIXct/POSIXt) has two
-  ### classes
-  if (data.source == "dta") {
-    data_classes <-
-      data |>
-      haven::as_factor() |>
-      time_only_correction(
-        sel.pos = time.var.sel.pos,
-        sel.neg = time.var.sel.neg
-      ) |>
-      lapply(\(x)class(x)[1]) |>
-      (\(x)do.call(c, x))()
-  } else {
-    data_classes <-
-      data |>
-      time_only_correction(
-        sel.pos = time.var.sel.pos,
-        sel.neg = time.var.sel.neg
-      ) |>
-      lapply(\(x)class(x)[1]) |>
-      (\(x)do.call(c, x))()
-  }
-
  ## ---------------------------------------
  ## Building the data dictionary
  ## ---------------------------------------
@ -240,12 +194,12 @@ ds2dd_detailed <- function(data,

      ## form.sep should be unique, but handles re-occuring pattern (by only considering first or last) and form.prefix defines if form is prefix or suffix
      ## The other split part is used as field names
-      if (form.prefix){
-        dd$form_name <- clean_redcap_name(Reduce(c,lapply(parts,\(.x) .x[[1]])))
-        dd$field_name <- Reduce(c,lapply(parts,\(.x) paste(.x[seq_len(length(.x))[-1]],collapse=form.sep)))
+      if (form.prefix) {
+        dd$form_name <- clean_redcap_name(Reduce(c, lapply(parts, \(.x) .x[[1]])))
+        dd$field_name <- Reduce(c, lapply(parts, \(.x) paste(.x[seq_len(length(.x))[-1]], collapse = form.sep)))
      } else {
-        dd$form_name <- clean_redcap_name(Reduce(c,lapply(parts,\(.x) .x[[length(.x)]])))
-        dd$field_name <- Reduce(c,lapply(parts,\(.x) paste(.x[seq_len(length(.x)-1)],collapse=form.sep)))
+        dd$form_name <- clean_redcap_name(Reduce(c, lapply(parts, \(.x) .x[[length(.x)]])))
+        dd$field_name <- Reduce(c, lapply(parts, \(.x) paste(.x[seq_len(length(.x) - 1)], collapse = form.sep)))
      }
    } else {
      dd$form_name <- "data"
@ -269,17 +223,16 @@ ds2dd_detailed <- function(data,
  ## field_label

  if (is.null(field.label)) {
-    if (data.source == "dta") {
-      dd$field_label <- data |>
-        lapply(function(x) {
-          if (haven::is.labelled(x)) {
-            attributes(x)[[field.label.attr]]
-          } else {
-            NA
-          }
-        }) |>
-        (\(x)do.call(c, x))()
-    }
+    dd$field_label <- data |>
+      lapply(function(x) {
+        if (haven::is.labelled(x)) {
+          att <- haven_all_levels(x)
+          names(att)
+        } else {
+          NA
+        }
+      }) |>
+      (\(x)do.call(c, x))()

    dd <-
      dd |> dplyr::mutate(field_label = dplyr::if_else(is.na(field_label),
@ -294,6 +247,8 @@ ds2dd_detailed <- function(data,
  }


+  data_classes <- do.call(c, lapply(data, \(.x)class(.x)[1]))
+
  ## field_type

  if (is.null(field.type)) {
@ -312,7 +267,6 @@ ds2dd_detailed <- function(data,
  }

  ## validation
-
  if (is.null(field.validation)) {
    dd <-
      dd |> dplyr::mutate(
@ -336,15 +290,13 @@ ds2dd_detailed <- function(data,
    }
  }

-
-
  ## choices

-  if (data.source == "dta") {
+  if (any(do.call(c, lapply(data, haven::is.labelled)))) {
    factor_levels <- data |>
      lapply(function(x) {
        if (haven::is.labelled(x)) {
-          att <- attributes(x)$labels
+          att <- haven_all_levels(x)
          paste(paste(att, names(att), sep = ", "), collapse = " | ")
        } else {
          NA
@ -383,16 +335,75 @@ ds2dd_detailed <- function(data,

  list(
    data = data |>
-      time_only_correction(
-        sel.pos = time.var.sel.pos,
-        sel.neg = time.var.sel.neg
-      ) |>
      hms2character() |>
      stats::setNames(dd$field_name),
    meta = dd
  )
 }

+#' Finish incomplete haven attributes substituting missings with values
+#'
+#' @param data haven labelled variable
+#'
+#' @return named vector
+#' @export
+#'
+#' @examples
+#' ds <- structure(c(1, 2, 3, 2, 10, 9),
+#'   labels = c(Unknown = 9, Refused = 10),
+#'   class = "haven_labelled"
+#' )
+#' ds |> haven_all_levels()
+haven_all_levels <- function(data) {
+  stopifnot(haven::is.labelled(data))
+  if (length(attributes(data)$labels) == length(unique(data))) {
+    out <- attributes(data)$labels
+  } else {
+    att <- attributes(data)$labels
+    out <- c(unique(data[!data %in% att]), att) |>
+      stats::setNames(c(unique(data[!data %in% att]), names(att)))
+  }
+  out
+}
+
+
+#' Guess time variables based on naming pattern
+#'
+#' @description
+#' This is for repairing data with time variables with appended "1970-01-01"
+#'
+#'
+#' @param data data.frame or tibble
+#' @param validate.time Flag to validate guessed time columns
+#' @param time.var.sel.pos Positive selection regex string passed to
+#' `gues_time_only_filter()` as sel.pos.
+#' @param time.var.sel.neg Negative selection regex string passed to
+#' `gues_time_only_filter()` as sel.neg.
+#'
+#' @return data.frame or tibble
+#' @export
+#'
+#' @examples
+#' redcapcast_data |> guess_time_only(validate.time = TRUE)
+guess_time_only <- function(data,
+                            validate.time = FALSE,
+                            time.var.sel.pos = "[Tt]i[d(me)]",
+                            time.var.sel.neg = "[Dd]at[eo]") {
+  if (validate.time) {
+    return(data |> guess_time_only_filter(validate = TRUE))
+  }
+
+  ### Only keeps the first class, as time fields (POSIXct/POSIXt) has two
+  ### classes
+  data |> time_only_correction(
+    sel.pos = time.var.sel.pos,
+    sel.neg = time.var.sel.neg
+  )
+}
+
+
+
+
 ### Completion
 #' Completion marking based on completed upload
 #'
@ -413,3 +424,127 @@ mark_complete <- function(upload, ls) {
  ) |>
    stats::setNames(c(names(data)[1], paste0(forms, "_complete")))
 }
+
+
+#' Helper to auto-parse un-formatted data with haven and readr
+#'
+#' @param data data.frame or tibble
+#' @param guess_type logical to guess type with readr
+#' @param col_types specify col_types using readr semantics. Ignored if guess_type is TRUE
+#' @param locale option to specify locale. Defaults to readr::default_locale().
+#' @param ignore.vars specify column names of columns to ignore when parsing
+#' @param ... ignored
+#'
+#' @return data.frame or tibble
+#' @export
+#'
+#' @examples
+#' mtcars |>
+#'   parse_data() |>
+#'   str()
+parse_data <- function(data,
+                       guess_type = TRUE,
+                       col_types = NULL,
+                       locale = readr::default_locale(),
+                       ignore.vars = "cpr",
+                       ...) {
+  if (any(ignore.vars %in% names(data))) {
+    ignored <- data[ignore.vars]
+  } else {
+    ignored <- NULL
+  }
+
+  ## Parses haven data by applying labels as factors in case of any
+  if (do.call(c, lapply(data, (\(x)inherits(x, "haven_labelled")))) |> any()) {
+    data <- data |>
+      haven::as_factor()
+  }
+
+  ## Applying readr cols
+  if (is.null(col_types) && guess_type) {
+    if (do.call(c, lapply(data, is.character)) |> any()) {
+      data <- data |> readr::type_convert(
+        locale = locale,
+        col_types = readr::cols(.default = readr::col_guess())
+      )
+    }
+  } else {
+    data <- data |> readr::type_convert(
+      locale = locale,
+      col_types = readr::cols(col_types)
+    )
+  }
+
+  if (!is.null(ignored)) {
+    data[ignore.vars] <- ignored
+  }
+
+  data
+}
+
+#' Convert vector to factor based on threshold of number of unique levels
+#'
+#' @description
+#' This is a wrapper of forcats::as_factor, which sorts numeric vectors before
+#' factoring, but levels character vectors in order of appearance.
+#'
+#'
+#' @param data vector or data.frame column
+#' @param unique.n threshold to convert class to factor
+#'
+#' @return vector
+#' @export
+#' @importFrom forcats as_factor
+#'
+#' @examples
+#' sample(seq_len(4), 20, TRUE) |>
+#'   var2fct(6) |>
+#'   summary()
+#' sample(letters, 20) |>
+#'   var2fct(6) |>
+#'   summary()
+#' sample(letters[1:4], 20, TRUE) |> var2fct(6)
+var2fct <- function(data, unique.n) {
+  if (length(unique(data)) <= unique.n) {
+    forcats::as_factor(data)
+  } else {
+    data
+  }
+}
+
+#' Applying var2fct across data set
+#'
+#' @description
+#' Individual thresholds for character and numeric columns
+#'
+#' @param data dataset. data.frame or tibble
+#' @param numeric.threshold threshold for var2fct for numeric columns. Default
+#' is 6.
+#' @param character.throshold threshold for var2fct for character columns.
+#' Default is 6.
+#'
+#' @return data.frame or tibble
+#' @export
+#'
+#' @examples
+#' mtcars |> str()
+#' mtcars |>
+#'   numchar2fct(numeric.threshold = 6) |>
+#'   str()
+numchar2fct <- function(data, numeric.threshold = 6, character.throshold = 6) {
+  data |>
+    dplyr::mutate(
+      dplyr::across(
+        dplyr::where(is.numeric),
+        \(.x){
+          var2fct(data = .x, unique.n = numeric.threshold)
+        }
+      ),
+      dplyr::across(
+        dplyr::where(is.character),
+        \(.x){
+          var2fct(data = .x, unique.n = character.throshold)
+        }
+      )
+    )
+}
--- a/R/export_redcap_instrument.R
+++ b/R/export_redcap_instrument.R
@ -14,6 +14,7 @@
 #' @param record.id record id variable name. Default is 'record_id'.
 #'
 #' @return exports zip-file
+#' @export
 #'
 #' @examples
 #' #iris |>
--- a/R/redcapcast_data.R
+++ b/R/redcapcast_data.R
@ -17,6 +17,9 @@
 #'   \item{age_integer}{Age integer, numeric}
 #'   \item{sex}{Legal sex, character}
 #'   \item{cohabitation}{Cohabitation status, character}
+#'   \item{con_calc}{con_calc}
+#'   \item{con_mrs}{con_mrs}
+#'   \item{consensus_complete}{consensus_complete}
 #'   \item{hypertension}{Hypertension, character}
 #'   \item{diabetes}{diabetes, character}
 #'   \item{region}{region, character}
--- a/R/shiny_cast.R
+++ b/R/shiny_cast.R
@ -22,7 +22,7 @@ shiny_cast <- function(...) {



-#' Helper to import files correctly
+#' DEPRECATED Helper to import files correctly
 #'
 #' @param filenames file names
 #'
@ -31,11 +31,13 @@ shiny_cast <- function(...) {
 #'
 #' @examples
 #' file_extension(list.files(here::here(""))[[2]])[[1]]
-#' file_extension(c("file.cd..ks","file"))
+#' file_extension(c("file.cd..ks", "file"))
 file_extension <- function(filenames) {
-  sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "",
-      filenames,
-      perl = TRUE)
+  sub(
+    pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "",
+    filenames,
+    perl = TRUE
+  )
 }

 #' Flexible file import based on extension
@ -49,7 +51,7 @@ file_extension <- function(filenames) {
 #' @examples
 #' read_input("https://raw.githubusercontent.com/agdamsbo/cognitive.index.lookup/main/data/sample.csv")
 read_input <- function(file, consider.na = c("NA", '""', "")) {
-  ext <- file_extension(file)
+  ext <- tools::file_ext(file)

  tryCatch(
    {
@ -74,4 +76,3 @@ read_input <- function(file, consider.na = c("NA", '""', "")) {

  df
 }
-
--- a/README.md
+++ b/README.md
@ -1,70 +1,53 @@
 <!-- badges: start -->
-[![GitHub R package version](https://img.shields.io/github/r-package/v/agdamsbo/REDCapCAST)](https://github.com/agdamsbo/REDCapCAST)
-[![CRAN/METACRAN](https://img.shields.io/cran/v/REDCapCAST)](https://CRAN.R-project.org/package=REDCapCAST)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8013984.svg)](https://doi.org/10.5281/zenodo.8013984)
-[![R-hub](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml)
-[![R-CMD-check](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml)
-[![Page deployed](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment)
-[![Codecov test coverage](https://codecov.io/gh/agdamsbo/REDCapCAST/branch/master/graph/badge.svg)](https://app.codecov.io/gh/agdamsbo/REDCapCAST?branch=master)
-[![CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/REDCapCAST)](https://cran.r-project.org/package=REDCapCAST)
-[![Lifecycle:
-experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html)
+
+[![GitHub R package version](https://img.shields.io/github/r-package/v/agdamsbo/REDCapCAST)](https://github.com/agdamsbo/REDCapCAST) [![CRAN/METACRAN](https://img.shields.io/cran/v/REDCapCAST)](https://CRAN.R-project.org/package=REDCapCAST) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8013984.svg)](https://doi.org/10.5281/zenodo.8013984) [![R-hub](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/rhub.yaml) [![R-CMD-check](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/R-CMD-check.yaml) [![Page deployed](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/agdamsbo/REDCapCAST/actions/workflows/pages/pages-build-deployment) [![Codecov test coverage](https://codecov.io/gh/agdamsbo/REDCapCAST/branch/master/graph/badge.svg)](https://app.codecov.io/gh/agdamsbo/REDCapCAST?branch=master) [![CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/REDCapCAST)](https://cran.r-project.org/package=REDCapCAST) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html)
+
 <!-- badges: end -->

-# REDCapCAST package <img src="man/figures/logo.png" align="right" />
+# REDCapCAST package <img src="man/figures/logo.png" align="right"/>

-REDCap database casting and handling of castellated data when using repeated instruments and longitudinal projects.
+Casting metadata for REDCap database creation and handling of castellated data using repeated instruments and longitudinal projects in REDCap.

-This package is a fork of [pegeler/REDCapRITS](https://github.com/pegeler/REDCapRITS). The `REDCapRITS` represents great and extensive work to handle castellated REDCap data in different programming languages. This fork is purely minded on R usage and includes a few implementations of the main `REDCap_split` function. The `REDCapRITS` as well as `REDCapCAST` would not be possible without the outstanding work in [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/).
+This is implemented with

-## What problem does `REDCapCAST` solve?
+-   An app-interface for easy database creation [accessible here](https://agdamsbo.shinyapps.io/redcapcast/) or available to run locally with `shiny_cast()` allowing you to easily create a REDCap database based on an existing spreadsheet.

-I started working on this project as the castellated longitudinal data set was a little challenging. Later, I have come to learn of the [`redcapAPI`](https://github.com/vubiostat/redcapAPI) package, which would also cover this functionality. I find the `redcapAPI`package quite advanced and a little difficult to work with. This have led to the continued work on this package, as an easy-to-use approach for data migration, data base creation and data handling. This package is very much to be seen as an attempt at a R-to-REDCap-to-R foundry for handling both the transition from dataset/variable list to database and the other way, from REDCap database to a tidy dataset. The goal was also to allow for a "minimal data" approach by allowing to filter records, instruments and variables in the export to only download data needed. I think this approach is desirable for handling sensitive, clinical data. Please refer to [REDCap-Tools](https://redcap-tools.github.io/) for other great tools for working with REDCap in R.
+-   Export data from REDCap in different formats handling castellated data, and on default only export requested data, this is mainly through `read_redcap_tables()`.
+
+REDCapCAST was initially build on, and still includes code from [pegeler/REDCapRITS](https://github.com/pegeler/REDCapRITS), and relies on functions from the [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/)-project
+
+## History
+
+This package was originally forked from [pegeler/REDCapRITS](https://github.com/pegeler/REDCapRITS). The `REDCapRITS` represents great and extensive work to handle castellated REDCap data in different programming languages. REDCapCAST has evolved into much more than just handling castellated data and so has been detatched from the original project while still relying on the main `REDCap_split` function. All access to the REDCap database is build on the outstanding work in [`REDCapR`](#0).
+
+This package really started out of frustration during my PhD in health science hearing colleagues complaining about that "castellated" data formatting of REDCap exports when doing longitudinal projects and being used to wide data. This led to some bad decisions in building databases avoiding repeated instruments. This package solves these challenges, but solutions are also implemented else where like the [redcapAPI](https://github.com/vubiostat/redcapAPI) or [REDCapTidieR](https://github.com/CHOP-CGTInformatics/REDCapTidieR) packages, which are bigger project.
+
+To help new PhD students and other researchers, I have also worked on creating a few helper/wrapper-functions to ease data access. Documentation is on it's way.

 For any more advanced uses, consider using the [`redcapAPI`](https://github.com/vubiostat/redcapAPI) or [`REDCapR`](https://ouhscbbmc.github.io/REDCapR/) packages.

-What is unique in this package, is the work towards making it a lot easier to move data from different sources to REDCap databases: casting REDCap metadata based on a spreadsheet. This is all wrapped in `shiny_cast()`and [hosted for all to use here](https://agdamsbo.shinyapps.io/redcapcast/).
-
-## Main functionality
-
-Here is just a short description of the main functions:
-
-* `REDcap_split()`: Works largely as the original `REDCapRITS::REDCap_split()`. It takes a REDCap dataset and metadata (data dictionary) to split the data set into a list of dataframes of instruments.
-
-* `read_redcap_tables()`: wraps the use of [`REDCapR::redcap_read()`](https://github.com/OuhscBbmc/REDCapR) with `REDCap_split()` to ease the export of REDCap data. Default output is a list of data frames with one data frame for each REDCap instrument.
-
-* `redcap_wider()`: joins and pivots a list of data frames with repeated instruments to a wide format utilizing the [`tidyr::pivot_wider()`](https://tidyr.tidyverse.org/reference/pivot_wider.html) from the [tidyverse](https://www.tidyverse.org/).
-
-* `easy_redcap()`: combines secure API key storage with the `keyring`-package, focused data retrieval and optional widening. This is the recommended approach for easy data access and analysis.
-
-* `ds2dd_detailed()`: Converts a data set to a data dictionary for upload to a new REDCap database. Variables (fields) and instruments in a REDCap data base are defined by this data dictionary.
-
-* `doc2dd()`: Converts a document table to data dictionary. This allows to specify instrument or whole data dictionary in text document, which for most is easier to work with and easily modifiable. Very much like a easy version of just working directly in the data dictionary file itself.
-
-* `shiny_cast()`: [Shiny](https://shiny.posit.co/) application to ease the process of converting a spreadsheet/data set to a REDCap database. The app runs locally and data is transferred securely. You can just create and upload the data dictionary, but you can also transfer the given data in the same process. The app is [hosted on shinyapps.io](https://agdamsbo.shinyapps.io/redcapcast/).
-
 ## Future

 The plan with this package is to be bundled with a Handbook on working with REDCap from R. This work is in progress but is limited by the time available. Please feel free to contact me or create and issue with ideas for future additions.

-## Installation
+## Installation and use

 The package is available on CRAN. Install the latest version:

-```
+```         
 install.packages("REDCapCAST")
 ```

 Install the latest version directly from GitHub:

-```
+```         
 require("remotes")
 remotes::install_github("agdamsbo/REDCapCAST")
 ```

 Launch the REDCapCAST app interface directly on your own machine:

-```
+```         
 REDCapCAST::shiny_cast()
 ```

--- a/data/redcapcast_data.rda
+++ b/data/redcapcast_data.rda
--- a/data/redcapcast_meta.rda
+++ b/data/redcapcast_meta.rda
--- a/inst/shiny-examples/casting/server.R
+++ b/inst/shiny-examples/casting/server.R
@ -1,4 +1,3 @@
-library(REDCapCAST)
 library(bslib)
 library(shiny)
 library(openxlsx2)
@ -7,9 +6,14 @@ library(readODS)
 library(readr)
 library(dplyr)
 library(here)
+library(devtools)
+if (!requireNamespace("REDCapCAST")){
+  devtools::install_github("agdamsbo/REDCapCAST", quiet = TRUE, upgrade = "never")
+}
+library(REDCapCAST)
+

 server <- function(input, output, session) {
-
  v <- shiny::reactiveValues(
    file = NULL
  )
@ -17,7 +21,8 @@ server <- function(input, output, session) {
  dat <- shiny::reactive({
    shiny::req(input$ds)

-    read_input(input$ds$datapath)
+    read_input(input$ds$datapath) |>
+      parse_data()
  })

  # getData <- reactive({
@ -27,6 +32,11 @@ server <- function(input, output, session) {
  #   return(!is.null(getData()))
  # })

+  dd <- shiny::reactive({
+    shiny::req(input$ds)
+    v$file <- "loaded"
+    ds2dd_detailed(data = dat())
+  })

  output$uploaded <- shiny::reactive({
    if (is.null(v$file)) {
@ -38,12 +48,6 @@ server <- function(input, output, session) {

  shiny::outputOptions(output, "uploaded", suspendWhenHidden = FALSE)

-  dd <- shiny::reactive({
-    shiny::req(input$ds)
-    v$file <- "loaded"
-    ds2dd_detailed(data = dat())
-  })
-
  output$data.tbl <- gt::render_gt(
    dd() |>
      purrr::pluck("data") |>
@ -63,7 +67,7 @@ server <- function(input, output, session) {
  output$downloadData <- shiny::downloadHandler(
    filename = "data_ready.csv",
    content = function(file) {
-      write.csv(purrr::pluck(dd(), "data"), file, row.names = FALSE,na = "")
+      write.csv(purrr::pluck(dd(), "data"), file, row.names = FALSE, na = "")
    }
  )

@ -71,13 +75,13 @@ server <- function(input, output, session) {
  output$downloadMeta <- shiny::downloadHandler(
    filename = "datadictionary_ready.csv",
    content = function(file) {
-      write.csv(purrr::pluck(dd(), "meta"), file, row.names = FALSE,na = "")
+      write.csv(purrr::pluck(dd(), "meta"), file, row.names = FALSE, na = "")
    }
  )

  # Downloadable .zip of instrument ----
  output$downloadInstrument <- shiny::downloadHandler(
-    filename = paste0("REDCapCAST_instrument",Sys.Date(),".zip"),
+    filename = paste0("REDCapCAST_instrument", Sys.Date(), ".zip"),
    content = function(file) {
      export_redcap_instrument(purrr::pluck(dd(), "meta"), file)
    }
@ -87,12 +91,15 @@ server <- function(input, output, session) {

  output_staging$meta <- output_staging$data <- NA

-  shiny::observeEvent(input$upload.meta,{  upload_meta()  })
+  shiny::observeEvent(input$upload.meta, {
+    upload_meta()
+  })

-  shiny::observeEvent(input$upload.data,{  upload_data()  })
-
-  upload_meta <- function(){
+  shiny::observeEvent(input$upload.data, {
+    upload_data()
+  })

+  upload_meta <- function() {
    shiny::req(input$uri)

    shiny::req(input$api)
@ -101,11 +108,10 @@ server <- function(input, output, session) {
      ds = purrr::pluck(dd(), "meta"),
      redcap_uri = input$uri,
      token = input$api
-    )|> purrr::pluck("success")
+    ) |> purrr::pluck("success")
  }

-  upload_data <- function(){
-
+  upload_data <- function() {
    shiny::req(input$uri)

    shiny::req(input$api)
@ -121,4 +127,8 @@ server <- function(input, output, session) {

  output$upload.data.print <- renderText(output_staging$data)

+  # session$onSessionEnded(function() {
+  #   # cat("Session Ended\n")
+  #   unlink("www",recursive = TRUE)
+  # })
 }
--- a/inst/shiny-examples/casting/ui.R
+++ b/inst/shiny-examples/casting/ui.R
@ -19,20 +19,27 @@ ui <-
            ".ods"
          )
        ),
-        shiny::helpText("Have a look at the preview panels to show download options."),
+        # shiny::actionButton(
+        #   inputId = "load_data",
+        #   label = "Load data",
+        #   icon = shiny::icon("circle-down")
+        # ),
+        shiny::helpText("Have a look at the preview panels to validate the data dictionary and imported data."),
        # For some odd reason this only unfolds when the preview panel is shown..
+        # This has been solved by adding an arbitrary button to load data - which was abandoned again
        shiny::conditionalPanel(
          condition = "output.uploaded=='yes'",
-          shiny::helpText("Below you can download the dataset formatted for upload and the
-         corresponding data dictionary for a new data base, if you want to upload manually."),
+          # condition = "input.load_data",
+         #  shiny::helpText("Below you can download the dataset formatted for upload and the
+         # corresponding data dictionary for a new data base, if you want to upload manually."),
          # Button
-          shiny::downloadButton("downloadData", "Download renamed data"),
+          shiny::downloadButton(outputId = "downloadData", label = "Download renamed data"),

          # Button
-          shiny::downloadButton("downloadMeta", "Download data dictionary"),
+          shiny::downloadButton(outputId = "downloadMeta", label = "Download data dictionary"),

          # Button
-          shiny::downloadButton("downloadInstrument", "Download as instrument"),
+          shiny::downloadButton(outputId = "downloadInstrument", label = "Download as instrument"),

          # Horizontal line ----
          shiny::tags$hr(),
@ -108,3 +115,4 @@ ui <-
      )
    )
  )
+
--- a/man/REDCapCAST-package.Rd
+++ b/man/REDCapCAST-package.Rd
@ -4,7 +4,7 @@
 \name{REDCapCAST-package}
 \alias{REDCapCAST}
 \alias{REDCapCAST-package}
-\title{REDCapCAST: REDCap Castellated Data Handling And Metadata Casting}
+\title{REDCapCAST: REDCap Castellated Data Handling and Metadata Casting}
 \description{
 \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}

--- a/man/ds2dd_detailed.Rd
+++ b/man/ds2dd_detailed.Rd
@ -15,10 +15,7 @@ ds2dd_detailed(
  field.label = NULL,
  field.label.attr = "label",
  field.validation = NULL,
-  metadata = names(REDCapCAST::redcapcast_meta),
-  validate.time = FALSE,
-  time.var.sel.pos = "[Tt]i[d(me)]",
-  time.var.sel.neg = "[Dd]at[eo]"
+  metadata = names(REDCapCAST::redcapcast_meta)
 )
 }
 \arguments{
@ -58,14 +55,6 @@ file with `haven::read_dta()`).}

 \item{metadata}{redcap metadata headings. Default is
 REDCapCAST:::metadata_names.}
-
-\item{validate.time}{Flag to validate guessed time columns}
-
-\item{time.var.sel.pos}{Positive selection regex string passed to
-`gues_time_only_filter()` as sel.pos.}
-
-\item{time.var.sel.neg}{Negative selection regex string passed to
-`gues_time_only_filter()` as sel.neg.}
 }
 \value{
 list of length 2
@ -84,7 +73,6 @@ Ensure, that the data set is formatted with as much information as possible.
 }
 \examples{
 data <- REDCapCAST::redcapcast_data
-data |> ds2dd_detailed(validate.time = TRUE)
 data |> ds2dd_detailed()
 iris |> ds2dd_detailed(add.auto.id = TRUE)
 iris |>
--- a/man/file_extension.Rd
+++ b/man/file_extension.Rd
@ -2,7 +2,7 @@
 % Please edit documentation in R/shiny_cast.R
 \name{file_extension}
 \alias{file_extension}
-\title{Helper to import files correctly}
+\title{DEPRECATED Helper to import files correctly}
 \usage{
 file_extension(filenames)
 }
@ -13,9 +13,9 @@ file_extension(filenames)
 character vector
 }
 \description{
-Helper to import files correctly
+DEPRECATED Helper to import files correctly
 }
 \examples{
 file_extension(list.files(here::here(""))[[2]])[[1]]
-file_extension(c("file.cd..ks","file"))
+file_extension(c("file.cd..ks", "file"))
 }
--- a/man/guess_time_only.Rd
+++ b/man/guess_time_only.Rd
@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds2dd_detailed.R
+\name{guess_time_only}
+\alias{guess_time_only}
+\title{Guess time variables based on naming pattern}
+\usage{
+guess_time_only(
+  data,
+  validate.time = FALSE,
+  time.var.sel.pos = "[Tt]i[d(me)]",
+  time.var.sel.neg = "[Dd]at[eo]"
+)
+}
+\arguments{
+\item{data}{data.frame or tibble}
+
+\item{validate.time}{Flag to validate guessed time columns}
+
+\item{time.var.sel.pos}{Positive selection regex string passed to
+`gues_time_only_filter()` as sel.pos.}
+
+\item{time.var.sel.neg}{Negative selection regex string passed to
+`gues_time_only_filter()` as sel.neg.}
+}
+\value{
+data.frame or tibble
+}
+\description{
+This is for repairing data with time variables with appended "1970-01-01"
+}
+\examples{
+redcapcast_data |> guess_time_only(validate.time = TRUE)
+}
--- a/man/haven_all_levels.Rd
+++ b/man/haven_all_levels.Rd
@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds2dd_detailed.R
+\name{haven_all_levels}
+\alias{haven_all_levels}
+\title{Finish incomplete haven attributes substituting missings with values}
+\usage{
+haven_all_levels(data)
+}
+\arguments{
+\item{data}{haven labelled variable}
+}
+\value{
+named vector
+}
+\description{
+Finish incomplete haven attributes substituting missings with values
+}
+\examples{
+ds <- structure(c(1, 2, 3, 2, 10, 9),
+  labels = c(Unknown = 9, Refused = 10),
+  class = "haven_labelled"
+)
+ds |> haven_all_levels()
+}
--- a/man/numchar2fct.Rd
+++ b/man/numchar2fct.Rd
@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds2dd_detailed.R
+\name{numchar2fct}
+\alias{numchar2fct}
+\title{Applying var2fct across data set}
+\usage{
+numchar2fct(data, numeric.threshold = 6, character.throshold = 6)
+}
+\arguments{
+\item{data}{dataset. data.frame or tibble}
+
+\item{numeric.threshold}{threshold for var2fct for numeric columns. Default
+is 6.}
+
+\item{character.throshold}{threshold for var2fct for character columns.
+Default is 6.}
+}
+\value{
+data.frame or tibble
+}
+\description{
+Individual thresholds for character and numeric columns
+}
+\examples{
+mtcars |> str()
+mtcars |>
+  numchar2fct(numeric.threshold = 6) |>
+  str()
+}
--- a/man/parse_data.Rd
+++ b/man/parse_data.Rd
@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds2dd_detailed.R
+\name{parse_data}
+\alias{parse_data}
+\title{Helper to auto-parse un-formatted data with haven and readr}
+\usage{
+parse_data(
+  data,
+  guess_type = TRUE,
+  col_types = NULL,
+  locale = readr::default_locale(),
+  ignore.vars = "cpr",
+  ...
+)
+}
+\arguments{
+\item{data}{data.frame or tibble}
+
+\item{guess_type}{logical to guess type with readr}
+
+\item{col_types}{specify col_types using readr semantics. Ignored if guess_type is TRUE}
+
+\item{locale}{option to specify locale. Defaults to readr::default_locale().}
+
+\item{ignore.vars}{specify column names of columns to ignore when parsing}
+
+\item{...}{ignored}
+}
+\value{
+data.frame or tibble
+}
+\description{
+Helper to auto-parse un-formatted data with haven and readr
+}
+\examples{
+mtcars |>
+  parse_data() |>
+  str()
+}
--- a/man/redcapcast_data.Rd
+++ b/man/redcapcast_data.Rd
@ -19,6 +19,9 @@ A data frame with 22 variables:
  \item{age_integer}{Age integer, numeric}
  \item{sex}{Legal sex, character}
  \item{cohabitation}{Cohabitation status, character}
+  \item{con_calc}{con_calc}
+  \item{con_mrs}{con_mrs}
+  \item{consensus_complete}{consensus_complete}
  \item{hypertension}{Hypertension, character}
  \item{diabetes}{diabetes, character}
  \item{region}{region, character}
--- a/man/var2fct.Rd
+++ b/man/var2fct.Rd
@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds2dd_detailed.R
+\name{var2fct}
+\alias{var2fct}
+\title{Convert vector to factor based on threshold of number of unique levels}
+\usage{
+var2fct(data, unique.n)
+}
+\arguments{
+\item{data}{vector or data.frame column}
+
+\item{unique.n}{threshold to convert class to factor}
+}
+\value{
+vector
+}
+\description{
+This is a wrapper of forcats::as_factor, which sorts numeric vectors before
+factoring, but levels character vectors in order of appearance.
+}
+\examples{
+sample(seq_len(4), 20, TRUE) |>
+  var2fct(6) |>
+  summary()
+sample(letters, 20) |>
+  var2fct(6) |>
+  summary()
+sample(letters[1:4], 20, TRUE) |> var2fct(6)
+}
--- a/vignettes/Database-creation.Rmd
+++ b/vignettes/Database-creation.Rmd
@ -45,7 +45,7 @@ The more advanced `ds2dd_detailed()` is a natural development. It will try to ap

 The dataset should be correctly formatted for the data dictionary to preserve as much information as possible.

-```{r eval=TRUE}
+```{r eval=FALSE}
 d2 <- REDCapCAST::redcapcast_data |> 
  dplyr::mutate(record_id = seq_len(dplyr::n()),
                region=factor(region)) |>
--- a/vignettes/Introduction.Rmd
+++ b/vignettes/Introduction.Rmd
@ -1,8 +1,8 @@
 ---
-title: "Introduction"
+title: "REDCapCAST"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Introduction}
+  %\VignetteIndexEntry{REDCapCAST}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
 ---
@ -31,16 +31,6 @@ redcapcast_data |> gt::gt()
 ```{r}
 redcapcast_meta |> gt::gt()
 ```
-```{r}
-list <-
-  REDCap_split(
-    records = redcapcast_data,
-    metadata = redcapcast_meta,
-    forms = "repeating"
-  ) |> 
-  sanitize_split()
-str(list)
-```

 ```{r}
 list <-
--- a/vignettes/Shiny-app.Rmd
+++ b/vignettes/Shiny-app.Rmd
@ -14,13 +14,79 @@ knitr::opts_chunk$set(
 )
 ```

-To make the easiest possible transition from spreadsheet/dataset to REDCap, I have created a small Shiny app, which adds a graphical interface to the casting of a data dictionary and data upload. Install the package and run the app as follows:
+To make the easiest possible transition from spreadsheet/dataset to REDCap, I have created a small app, which adds a graphical interface to the casting of a data dictionary and data upload. Install the package and launch the app as follows:

 ```{r eval=FALSE}
-require(REDCapCAST)
-shiny_cast()
+REDCapCAST::shiny_cast()
 ```

-The app will launch in a new window and the interface should be fairly self-explanatory.
-The app only provides the most basic functionality, but might be extended in the future.
+The app primarily wraps one function: `ds2dd_detailed()`.

+```{r}
+library(REDCapCAST)
+ds <- REDCap_split(
+    records = redcapcast_data,
+    metadata = redcapcast_meta,
+    forms = "all"
+  ) |> 
+  sanitize_split() |>
+  redcap_wider() 
+str(ds)
+```
+
+```{r}
+ds|> 
+  ds2dd_detailed()|>  
+  purrr::pluck("data") |> 
+  str()
+```
+
+```{r}
+ds|> 
+  ds2dd_detailed()|>  
+  purrr::pluck("meta") |> 
+  head(10)
+```
+
+Different data formats are accepted, which all mostly implements the `readr::col_guess()` functionality to parse column classes.
+
+To ensure uniformity in data import this parsing has been implemented on its own to use with `ds2dd_detailed()` or any other data set for that matter:
+
+```{r}
+ds_parsed <- redcapcast_data |> 
+  dplyr::mutate(dplyr::across(dplyr::everything(),as.character)) |> 
+  parse_data()
+str(ds_parsed)
+```
+
+It will ignore specified columns, which is neat for numeric-looking strings like cpr-with a leading 0:
+
+```{r}
+redcapcast_data |> 
+  dplyr::mutate(dplyr::across(dplyr::everything(),as.character)) |> 
+  parse_data(ignore.vars = c("record_id","cpr")) |> 
+  str()
+```
+
+
+```{r}
+
+```
+
+
+Column classes can be passed to `parse_data()`.
+
+Making a few crude assumption for factorising data, `numchar2fct()` factorises numerical and character vectors based on a set threshold for unique values:
+
+```{r}
+mtcars |> str()
+mtcars |>
+  numchar2fct(numeric.threshold = 6) |>
+  str()
+```
+
+```{r}
+ds_parsed|>
+  numchar2fct(numeric.threshold = 2) |>
+  str()
+```
Author	SHA1	Message	Date
Andreas Gammelgaard Damsbo	f2b2784547	adding as exported element	2024-11-18 16:59:06 +01:00
Andreas Gammelgaard Damsbo	3590a9e216	formatting	2024-11-18 16:50:03 +01:00
Andreas Gammelgaard Damsbo	4e7af7d01f	updated vignette and formatting	2024-11-18 16:26:10 +01:00
Andreas Gammelgaard Damsbo	0c3286cb2f	news	2024-11-18 16:14:45 +01:00
Andreas Gammelgaard Damsbo	9d53f84427	docs and name update	2024-11-18 14:41:44 +01:00
Andreas Gammelgaard Damsbo	9a069a422f	use tools file_ext instead of own	2024-11-18 14:41:30 +01:00
Andreas Gammelgaard Damsbo	954f58bf1d	test data updated	2024-11-18 14:40:47 +01:00
Andreas Gammelgaard Damsbo	ea08a2066f	all data parsing and formatting has been seperated in individual functions	2024-11-18 14:40:32 +01:00
Andreas Gammelgaard Damsbo	4911d4dbc8	renamed	2024-11-18 10:54:24 +01:00
Andreas Gammelgaard Damsbo	d56fd81966	restructured readme for fork detach	2024-11-18 10:53:53 +01:00