Skip to contents

To make the easiest possible transition from spreadsheet/dataset to REDCap, I have created a small app, which adds a graphical interface to the casting of a data dictionary and data upload. Install the package and launch the app as follows:

REDCapCAST::shiny_cast()

The app primarily wraps one function: ds2dd_detailed().

library(REDCapCAST)
ds <- REDCap_split(
    records = redcapcast_data,
    metadata = redcapcast_meta,
    forms = "all"
  ) |> 
  sanitize_split() |>
  redcap_wider() 
#> Joining with `by = join_by(record_id)`
#> Joining with `by = join_by(record_id)`
#> Joining with `by = join_by(record_id)`
str(ds)
#> 'data.frame':    6 obs. of  52 variables:
#>  $ record_id                   : num  1 2 3 4 5 6
#>  $ cpr                         : chr  "1203401OB4" "0102342303" "2301569823" "0204051342" ...
#>  $ inclusion                   : Date, format: "2023-03-13" "2023-03-01" ...
#>  $ inclusion_time              : 'hms' num  12:38:49 10:38:57 12:01:07 20:39:19 ...
#>   ..- attr(*, "units")= chr "secs"
#>  $ dob                         : Date, format: "1940-03-12" "1934-02-01" ...
#>  $ age                         : num  83 89.1 66.1 117.9 126.2 ...
#>  $ age_integer                 : num  83 89 66 117 126 91
#>  $ sex                         : chr  "female" "male" "male" "female" ...
#>  $ cohabitation                : chr  "Yes" "Yes" "No" NA ...
#>  $ hypertension                : chr  "No" "No" "Yes" NA ...
#>  $ diabetes                    : chr  "Yes" "No" "Yes" NA ...
#>  $ region                      : chr  "East" "South" "North" NA ...
#>  $ baseline_data_start_complete: chr  "Incomplete" "Incomplete" "Incomplete" "Incomplete" ...
#>  $ mrs_assessed_inclusion      : chr  "Yes" "Yes" NA NA ...
#>  $ mrs_assessed_follow1        : chr  NA "Yes" "Yes" NA ...
#>  $ mrs_assessed_follow2        : chr  NA NA "Yes" NA ...
#>  $ mrs_date_inclusion          : Date, format: "2023-03-13" "2023-03-07" ...
#>  $ mrs_date_follow1            : Date, format: NA "2023-03-09" ...
#>  $ mrs_date_follow2            : Date, format: NA NA ...
#>  $ mrs_score_inclusion         : num  1 1 NA NA NA NA
#>  $ mrs_score_follow1           : num  NA 3 2 NA NA NA
#>  $ mrs_score_follow2           : num  NA NA 1 NA NA NA
#>  $ mrs_complete_inclusion      : chr  "Incomplete" "Incomplete" NA NA ...
#>  $ mrs_complete_follow1        : chr  NA "Incomplete" "Incomplete" NA ...
#>  $ mrs_complete_follow2        : chr  NA NA "Incomplete" NA ...
#>  $ con_mrs                     : logi  NA NA NA NA NA NA
#>  $ con_calc                    : logi  NA NA NA NA NA NA
#>  $ consensus_complete          : chr  NA NA NA NA ...
#>  $ event_datetime_1_follow1    : POSIXct, format: NA "2024-01-18 12:49:42" ...
#>  $ event_datetime_1_follow2    : POSIXct, format: NA NA ...
#>  $ event_age_1_follow1         : num  NA NA NA 96 127 NA
#>  $ event_age_1_follow2         : num  NA NA NA 118 NA NA
#>  $ event_type_1_follow1        : chr  NA "TIA" "AIS" "TIA" ...
#>  $ event_type_1_follow2        : chr  NA NA "ICH" "AIS" ...
#>  $ new_event_complete_1_follow1: chr  NA "Incomplete" "Incomplete" "Complete" ...
#>  $ new_event_complete_1_follow2: chr  NA NA "Incomplete" "Complete" ...
#>  $ event_datetime_2_follow1    : POSIXct, format: NA NA ...
#>  $ event_datetime_2_follow2    : POSIXct, format: NA NA ...
#>  $ event_datetime_3_follow1    : POSIXct, format: NA NA ...
#>  $ event_datetime_3_follow2    : POSIXct, format: NA NA ...
#>  $ event_age_2_follow1         : num  NA NA NA 105 127 NA
#>  $ event_age_2_follow2         : num  NA NA NA 118 NA NA
#>  $ event_age_3_follow1         : num  NA NA NA NA NA NA
#>  $ event_age_3_follow2         : num  NA NA NA 118 NA NA
#>  $ event_type_2_follow1        : chr  NA NA "ICH" "TIA" ...
#>  $ event_type_2_follow2        : chr  NA NA "TIA" "ICH" ...
#>  $ event_type_3_follow1        : chr  NA NA NA NA ...
#>  $ event_type_3_follow2        : chr  NA NA "AIS" "Unknown" ...
#>  $ new_event_complete_2_follow1: chr  NA NA "Incomplete" "Complete" ...
#>  $ new_event_complete_2_follow2: chr  NA NA "Incomplete" "Incomplete" ...
#>  $ new_event_complete_3_follow1: chr  NA NA NA NA ...
#>  $ new_event_complete_3_follow2: chr  NA NA "Incomplete" "Complete" ...
ds|> 
  ds2dd_detailed()|>  
  purrr::pluck("data") |> 
  str()
#> tibble [6 × 52] (S3: tbl_df/tbl/data.frame)
#>  $ record_id                   : num [1:6] 1 2 3 4 5 6
#>  $ cpr                         : chr [1:6] "1203401OB4" "0102342303" "2301569823" "0204051342" ...
#>  $ inclusion                   : Date[1:6], format: "2023-03-13" "2023-03-01" ...
#>  $ inclusion_time              : chr [1:6] "12:38:49" "10:38:57" "12:01:07" "20:39:19" ...
#>  $ dob                         : Date[1:6], format: "1940-03-12" "1934-02-01" ...
#>  $ age                         : num [1:6] 83 89.1 66.1 117.9 126.2 ...
#>  $ age_integer                 : num [1:6] 83 89 66 117 126 91
#>  $ sex                         : chr [1:6] "female" "male" "male" "female" ...
#>  $ cohabitation                : chr [1:6] "Yes" "Yes" "No" NA ...
#>  $ hypertension                : chr [1:6] "No" "No" "Yes" NA ...
#>  $ diabetes                    : chr [1:6] "Yes" "No" "Yes" NA ...
#>  $ region                      : chr [1:6] "East" "South" "North" NA ...
#>  $ baseline_data_start_complete: chr [1:6] "Incomplete" "Incomplete" "Incomplete" "Incomplete" ...
#>  $ mrs_assessed_inclusion      : chr [1:6] "Yes" "Yes" NA NA ...
#>  $ mrs_assessed_follow1        : chr [1:6] NA "Yes" "Yes" NA ...
#>  $ mrs_assessed_follow2        : chr [1:6] NA NA "Yes" NA ...
#>  $ mrs_date_inclusion          : Date[1:6], format: "2023-03-13" "2023-03-07" ...
#>  $ mrs_date_follow1            : Date[1:6], format: NA "2023-03-09" ...
#>  $ mrs_date_follow2            : Date[1:6], format: NA NA ...
#>  $ mrs_score_inclusion         : num [1:6] 1 1 NA NA NA NA
#>  $ mrs_score_follow1           : num [1:6] NA 3 2 NA NA NA
#>  $ mrs_score_follow2           : num [1:6] NA NA 1 NA NA NA
#>  $ mrs_complete_inclusion      : chr [1:6] "Incomplete" "Incomplete" NA NA ...
#>  $ mrs_complete_follow1        : chr [1:6] NA "Incomplete" "Incomplete" NA ...
#>  $ mrs_complete_follow2        : chr [1:6] NA NA "Incomplete" NA ...
#>  $ con_mrs                     : Factor w/ 2 levels "FALSE","TRUE": NA NA NA NA NA NA
#>  $ con_calc                    : Factor w/ 2 levels "FALSE","TRUE": NA NA NA NA NA NA
#>  $ consensus_complete          : chr [1:6] NA NA NA NA ...
#>  $ event_datetime_1_follow1    : POSIXct[1:6], format: NA "2024-01-18 12:49:42" ...
#>  $ event_datetime_1_follow2    : POSIXct[1:6], format: NA NA ...
#>  $ event_age_1_follow1         : num [1:6] NA NA NA 96 127 NA
#>  $ event_age_1_follow2         : num [1:6] NA NA NA 118 NA NA
#>  $ event_type_1_follow1        : chr [1:6] NA "TIA" "AIS" "TIA" ...
#>  $ event_type_1_follow2        : chr [1:6] NA NA "ICH" "AIS" ...
#>  $ new_event_complete_1_follow1: chr [1:6] NA "Incomplete" "Incomplete" "Complete" ...
#>  $ new_event_complete_1_follow2: chr [1:6] NA NA "Incomplete" "Complete" ...
#>  $ event_datetime_2_follow1    : POSIXct[1:6], format: NA NA ...
#>  $ event_datetime_2_follow2    : POSIXct[1:6], format: NA NA ...
#>  $ event_datetime_3_follow1    : POSIXct[1:6], format: NA NA ...
#>  $ event_datetime_3_follow2    : POSIXct[1:6], format: NA NA ...
#>  $ event_age_2_follow1         : num [1:6] NA NA NA 105 127 NA
#>  $ event_age_2_follow2         : num [1:6] NA NA NA 118 NA NA
#>  $ event_age_3_follow1         : num [1:6] NA NA NA NA NA NA
#>  $ event_age_3_follow2         : num [1:6] NA NA NA 118 NA NA
#>  $ event_type_2_follow1        : chr [1:6] NA NA "ICH" "TIA" ...
#>  $ event_type_2_follow2        : chr [1:6] NA NA "TIA" "ICH" ...
#>  $ event_type_3_follow1        : chr [1:6] NA NA NA NA ...
#>  $ event_type_3_follow2        : chr [1:6] NA NA "AIS" "Unknown" ...
#>  $ new_event_complete_2_follow1: chr [1:6] NA NA "Incomplete" "Complete" ...
#>  $ new_event_complete_2_follow2: chr [1:6] NA NA "Incomplete" "Incomplete" ...
#>  $ new_event_complete_3_follow1: chr [1:6] NA NA NA NA ...
#>  $ new_event_complete_3_follow2: chr [1:6] NA NA "Incomplete" "Complete" ...
ds|> 
  ds2dd_detailed()|>  
  purrr::pluck("meta") |> 
  head(10)
#> # A tibble: 10 × 18
#>    field_name     form_name section_header field_type field_label   
#>    <chr>          <chr>     <lgl>          <chr>      <chr>         
#>  1 record_id      data      NA             text       record_id     
#>  2 cpr            data      NA             text       cpr           
#>  3 inclusion      data      NA             text       inclusion     
#>  4 inclusion_time data      NA             text       inclusion_time
#>  5 dob            data      NA             text       dob           
#>  6 age            data      NA             text       age           
#>  7 age_integer    data      NA             text       age_integer   
#>  8 sex            data      NA             text       sex           
#>  9 cohabitation   data      NA             text       cohabitation  
#> 10 hypertension   data      NA             text       hypertension  
#> # ℹ 13 more variables: select_choices_or_calculations <chr>, field_note <lgl>,
#> #   text_validation_type_or_show_slider_number <chr>,
#> #   text_validation_min <lgl>, text_validation_max <lgl>, identifier <lgl>,
#> #   branching_logic <lgl>, required_field <lgl>, custom_alignment <lgl>,
#> #   question_number <lgl>, matrix_group_name <lgl>, matrix_ranking <lgl>,
#> #   field_annotation <lgl>

Different data formats are accepted, which all mostly implements the readr::col_guess() functionality to parse column classes.

To ensure uniformity in data import this parsing has been implemented on its own to use with ds2dd_detailed() or any other data set for that matter:

ds_parsed <- redcapcast_data |> 
  dplyr::mutate(dplyr::across(dplyr::everything(),as.character)) |> 
  parse_data()
str(ds_parsed)
#> tibble [25 × 27] (S3: tbl_df/tbl/data.frame)
#>  $ record_id                   : num [1:25] 1 2 2 2 3 3 3 3 3 3 ...
#>  $ redcap_event_name           : chr [1:25] "inclusion" "inclusion" "follow1" "follow1" ...
#>  $ redcap_repeat_instrument    : chr [1:25] NA NA NA "New Event (?)" ...
#>  $ redcap_repeat_instance      : num [1:25] NA NA NA 1 NA NA NA 1 2 1 ...
#>  $ cpr                         : chr [1:25] "1203401OB4" "0102342303" NA NA ...
#>  $ inclusion                   : Date[1:25], format: "2023-03-13" "2023-03-01" ...
#>  $ inclusion_time              : 'hms' num [1:25] 12:38:49 10:38:57 NA NA ...
#>   ..- attr(*, "units")= chr "secs"
#>  $ dob                         : Date[1:25], format: "1940-03-12" "1934-02-01" ...
#>  $ age                         : num [1:25] 83 89.1 NA NA 66.1 ...
#>  $ age_integer                 : num [1:25] 83 89 NA NA 66 NA NA NA NA NA ...
#>  $ sex                         : chr [1:25] "female" "male" NA NA ...
#>  $ cohabitation                : chr [1:25] "Yes" "Yes" NA NA ...
#>  $ hypertension                : chr [1:25] "No" "No" NA NA ...
#>  $ diabetes                    : chr [1:25] "Yes" "No" NA NA ...
#>  $ region                      : chr [1:25] "East" "South" NA NA ...
#>  $ baseline_data_start_complete: chr [1:25] "Incomplete" "Incomplete" NA NA ...
#>  $ mrs_assessed                : chr [1:25] "Yes" "Yes" "Yes" NA ...
#>  $ mrs_date                    : Date[1:25], format: "2023-03-13" "2023-03-07" ...
#>  $ mrs_score                   : num [1:25] 1 1 3 NA NA 2 1 NA NA NA ...
#>  $ mrs_complete                : chr [1:25] "Incomplete" "Incomplete" "Incomplete" NA ...
#>  $ con_mrs                     : logi [1:25] NA NA NA NA NA NA ...
#>  $ con_calc                    : logi [1:25] NA NA NA NA NA NA ...
#>  $ consensus_complete          : chr [1:25] NA NA "Incomplete" NA ...
#>  $ event_datetime              : POSIXct[1:25], format: NA NA ...
#>  $ event_age                   : num [1:25] NA NA NA NA NA NA NA NA NA NA ...
#>  $ event_type                  : chr [1:25] NA NA NA "TIA" ...
#>  $ new_event_complete          : chr [1:25] NA NA NA "Incomplete" ...
#>  - attr(*, "problems")=<externalptr>

It will ignore specified columns, which is neat for numeric-looking strings like cpr-with a leading 0:

redcapcast_data |> 
  dplyr::mutate(dplyr::across(dplyr::everything(),as.character)) |> 
  parse_data(ignore.vars = c("record_id","cpr")) |> 
  str()
#> tibble [25 × 27] (S3: tbl_df/tbl/data.frame)
#>  $ record_id                   : chr [1:25] "1" "2" "2" "2" ...
#>  $ redcap_event_name           : chr [1:25] "inclusion" "inclusion" "follow1" "follow1" ...
#>  $ redcap_repeat_instrument    : chr [1:25] NA NA NA "New Event (?)" ...
#>  $ redcap_repeat_instance      : num [1:25] NA NA NA 1 NA NA NA 1 2 1 ...
#>  $ cpr                         : chr [1:25] "1203401OB4" "0102342303" NA NA ...
#>  $ inclusion                   : Date[1:25], format: "2023-03-13" "2023-03-01" ...
#>  $ inclusion_time              : 'hms' num [1:25] 12:38:49 10:38:57 NA NA ...
#>   ..- attr(*, "units")= chr "secs"
#>  $ dob                         : Date[1:25], format: "1940-03-12" "1934-02-01" ...
#>  $ age                         : num [1:25] 83 89.1 NA NA 66.1 ...
#>  $ age_integer                 : num [1:25] 83 89 NA NA 66 NA NA NA NA NA ...
#>  $ sex                         : chr [1:25] "female" "male" NA NA ...
#>  $ cohabitation                : chr [1:25] "Yes" "Yes" NA NA ...
#>  $ hypertension                : chr [1:25] "No" "No" NA NA ...
#>  $ diabetes                    : chr [1:25] "Yes" "No" NA NA ...
#>  $ region                      : chr [1:25] "East" "South" NA NA ...
#>  $ baseline_data_start_complete: chr [1:25] "Incomplete" "Incomplete" NA NA ...
#>  $ mrs_assessed                : chr [1:25] "Yes" "Yes" "Yes" NA ...
#>  $ mrs_date                    : Date[1:25], format: "2023-03-13" "2023-03-07" ...
#>  $ mrs_score                   : num [1:25] 1 1 3 NA NA 2 1 NA NA NA ...
#>  $ mrs_complete                : chr [1:25] "Incomplete" "Incomplete" "Incomplete" NA ...
#>  $ con_mrs                     : logi [1:25] NA NA NA NA NA NA ...
#>  $ con_calc                    : logi [1:25] NA NA NA NA NA NA ...
#>  $ consensus_complete          : chr [1:25] NA NA "Incomplete" NA ...
#>  $ event_datetime              : POSIXct[1:25], format: NA NA ...
#>  $ event_age                   : num [1:25] NA NA NA NA NA NA NA NA NA NA ...
#>  $ event_type                  : chr [1:25] NA NA NA "TIA" ...
#>  $ new_event_complete          : chr [1:25] NA NA NA "Incomplete" ...
#>  - attr(*, "problems")=<externalptr>

Column classes can be passed to parse_data().

Making a few crude assumption for factorising data, numchar2fct() factorises numerical and character vectors based on a set threshold for unique values:

mtcars |> str()
#> 'data.frame':    32 obs. of  11 variables:
#>  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
#>  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
#>  $ disp: num  160 160 108 258 360 ...
#>  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
#>  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
#>  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
#>  $ qsec: num  16.5 17 18.6 19.4 17 ...
#>  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
#>  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
#>  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
#>  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
mtcars |>
  numchar2fct(numeric.threshold = 6) |>
  str()
#> 'data.frame':    32 obs. of  11 variables:
#>  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
#>  $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
#>  $ disp: num  160 160 108 258 360 ...
#>  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
#>  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
#>  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
#>  $ qsec: num  16.5 17 18.6 19.4 17 ...
#>  $ vs  : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 1 2 2 2 ...
#>  $ am  : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 1 ...
#>  $ gear: Factor w/ 3 levels "3","4","5": 2 2 2 1 1 1 1 2 2 2 ...
#>  $ carb: Factor w/ 6 levels "1","2","3","4",..: 4 4 1 1 2 1 4 2 2 4 ...
ds_parsed|>
  numchar2fct(numeric.threshold = 2) |>
  str()
#> Warning: There were 13 warnings in `dplyr::mutate()`.
#> The first warning was:
#>  In argument: `dplyr::across(...)`.
#> Caused by warning in `.roman2numeric()`:
#> ! invalid roman numerals: inclusion inclusion follow1 follow1 inclusion follow1 follow2 follow1 follow1 follow2 follow2 follow2 inclusion follow1 follow2 follow1 follow1 follow2 follow2 follow2 inclusion follow1 follow1 follow1 inclusion
#>  Run `dplyr::last_dplyr_warnings()` to see the 12 remaining warnings.
#> tibble [25 × 27] (S3: tbl_df/tbl/data.frame)
#>  $ record_id                   : num [1:25] 1 2 2 2 3 3 3 3 3 3 ...
#>  $ redcap_event_name           : Factor w/ 3 levels "inclusion","follow1",..: 1 1 2 2 1 2 3 2 2 3 ...
#>  $ redcap_repeat_instrument    : Factor w/ 1 level "New Event (?)": NA NA NA 1 NA NA NA 1 1 1 ...
#>  $ redcap_repeat_instance      : num [1:25] NA NA NA 1 NA NA NA 1 2 1 ...
#>  $ cpr                         : chr [1:25] "1203401OB4" "0102342303" NA NA ...
#>  $ inclusion                   : Date[1:25], format: "2023-03-13" "2023-03-01" ...
#>  $ inclusion_time              : 'hms' num [1:25] 12:38:49 10:38:57 NA NA ...
#>   ..- attr(*, "units")= chr "secs"
#>  $ dob                         : Date[1:25], format: "1940-03-12" "1934-02-01" ...
#>  $ age                         : num [1:25] 83 89.1 NA NA 66.1 ...
#>  $ age_integer                 : num [1:25] 83 89 NA NA 66 NA NA NA NA NA ...
#>  $ sex                         : Factor w/ 2 levels "female","male": 1 2 NA NA 2 NA NA NA NA NA ...
#>  $ cohabitation                : Factor w/ 2 levels "Yes","No": 1 1 NA NA 2 NA NA NA NA NA ...
#>  $ hypertension                : Factor w/ 2 levels "No","Yes": 1 1 NA NA 2 NA NA NA NA NA ...
#>  $ diabetes                    : Factor w/ 2 levels "Yes","No": 1 2 NA NA 1 NA NA NA NA NA ...
#>  $ region                      : Factor w/ 3 levels "East","South",..: 1 2 NA NA 3 NA NA NA NA NA ...
#>  $ baseline_data_start_complete: Factor w/ 2 levels "Incomplete","Complete": 1 1 NA NA 1 NA NA NA NA NA ...
#>  $ mrs_assessed                : Factor w/ 1 level "Yes": 1 1 1 NA NA 1 1 NA NA NA ...
#>  $ mrs_date                    : Date[1:25], format: "2023-03-13" "2023-03-07" ...
#>  $ mrs_score                   : num [1:25] 1 1 3 NA NA 2 1 NA NA NA ...
#>  $ mrs_complete                : Factor w/ 1 level "Incomplete": 1 1 1 NA 1 1 1 NA NA NA ...
#>  $ con_mrs                     : logi [1:25] NA NA NA NA NA NA ...
#>  $ con_calc                    : logi [1:25] NA NA NA NA NA NA ...
#>  $ consensus_complete          : Factor w/ 1 level "Incomplete": NA NA 1 NA NA 1 1 NA NA NA ...
#>  $ event_datetime              : POSIXct[1:25], format: NA NA ...
#>  $ event_age                   : num [1:25] NA NA NA NA NA NA NA NA NA NA ...
#>  $ event_type                  : Factor w/ 4 levels "TIA","AIS","ICH",..: NA NA NA 1 NA NA NA 2 3 3 ...
#>  $ new_event_complete          : Factor w/ 2 levels "Incomplete","Complete": NA NA NA 1 NA NA NA 1 1 1 ...
#>  - attr(*, "problems")=<externalptr>