Skip to contents

Extract data from stata file for data dictionary

Usage

ds2dd_detailed(
  data,
  add.auto.id = FALSE,
  date.format = "dmy",
  form.name = NULL,
  field.type = NULL,
  field.label = NULL,
  field.label.attr = "label",
  field.validation = NULL,
  metadata = metadata_names,
  validate.time = FALSE,
  time.var.sel.pos = "[Tt]i[d(me)]",
  time.var.sel.neg = "[Dd]at[eo]"
)

Arguments

data

data frame

add.auto.id

flag to add id column

date.format

date format, character string. ymd/dmy/mdy. dafault is dmy.

form.name

manually specify form name(s). Vector of length 1 or ncol(data). Default is NULL and "data" is used.

field.type

manually specify field type(s). Vector of length 1 or ncol(data). Default is NULL and "text" is used for everything but factors, which wil get "radio".

field.label

manually specify field label(s). Vector of length 1 or ncol(data). Default is NULL and colnames(data) is used or attribute `field.label.attr` for haven_labelled data set (imported .dta file with `haven::read_dta()`).

field.label.attr

attribute name for named labels for haven_labelled data set (imported .dta file with `haven::read_dta()`. Default is "label"

field.validation

manually specify field validation(s). Vector of length 1 or ncol(data). Default is NULL and `levels()` are used for factors or attribute `factor.labels.attr` for haven_labelled data set (imported .dta file with `haven::read_dta()`).

metadata

redcap metadata headings. Default is REDCapCAST:::metadata_names.

validate.time

Flag to validate guessed time columns

time.var.sel.pos

Positive selection regex string passed to `gues_time_only_filter()` as sel.pos.

time.var.sel.neg

Negative selection regex string passed to `gues_time_only_filter()` as sel.neg.

Value

list of length 2

Details

This function is a natural development of the ds2dd() function. It assumes that the first column is the ID-column. No checks. Please, do always inspect the data dictionary before upload.

Ensure, that the data set is formatted with as much information as possible.

`field.type` can be supplied

Examples

data <- redcapcast_data
data |> ds2dd_detailed(validate.time = TRUE)
#> $is.POSIX
#> # A tibble: 19 × 2
#>    inclusion_time event_datetime     
#>    <time>         <dttm>             
#>  1 12:38:49       NA                 
#>  2 10:38:57       NA                 
#>  3       NA       NA                 
#>  4       NA       2024-01-18 12:49:42
#>  5 12:01:07       NA                 
#>  6       NA       NA                 
#>  7       NA       NA                 
#>  8       NA       2024-01-18 12:49:58
#>  9       NA       2024-01-18 12:50:01
#> 10       NA       2024-01-18 12:50:05
#> 11       NA       2024-01-18 12:50:07
#> 12       NA       2024-01-18 12:50:09
#> 13 20:39:19       NA                 
#> 14       NA       NA                 
#> 15       NA       2024-01-18 12:50:19
#> 16       NA       2024-01-18 12:50:22
#> 17       NA       2024-01-18 12:50:24
#> 18 08:50:31       NA                 
#> 19 08:49:28       NA                 
#> 
#> $is.datetime
#> # A tibble: 19 × 1
#>    event_datetime     
#>    <dttm>             
#>  1 NA                 
#>  2 NA                 
#>  3 NA                 
#>  4 2024-01-18 12:49:42
#>  5 NA                 
#>  6 NA                 
#>  7 NA                 
#>  8 2024-01-18 12:49:58
#>  9 2024-01-18 12:50:01
#> 10 2024-01-18 12:50:05
#> 11 2024-01-18 12:50:07
#> 12 2024-01-18 12:50:09
#> 13 NA                 
#> 14 NA                 
#> 15 2024-01-18 12:50:19
#> 16 2024-01-18 12:50:22
#> 17 2024-01-18 12:50:24
#> 18 NA                 
#> 19 NA                 
#> 
#> $is.time_only
#> # A tibble: 19 × 1
#>    inclusion_time
#>    <time>        
#>  1 12:38:49      
#>  2 10:38:57      
#>  3       NA      
#>  4       NA      
#>  5 12:01:07      
#>  6       NA      
#>  7       NA      
#>  8       NA      
#>  9       NA      
#> 10       NA      
#> 11       NA      
#> 12       NA      
#> 13 20:39:19      
#> 14       NA      
#> 15       NA      
#> 16       NA      
#> 17       NA      
#> 18 08:50:31      
#> 19 08:49:28      
#> 
data |> ds2dd_detailed()
#> $data
#> # A tibble: 19 × 24
#>    record_id redcap_event_name redcap_repeat_instrument redcap_repeat_instance
#>        <dbl> <chr>             <chr>                                     <dbl>
#>  1         1 inclusion         NA                                           NA
#>  2         2 inclusion         NA                                           NA
#>  3         2 follow1           NA                                           NA
#>  4         2 follow1           New Event (?)                                 1
#>  5         3 inclusion         NA                                           NA
#>  6         3 follow1           NA                                           NA
#>  7         3 follow2           NA                                           NA
#>  8         3 follow1           New Event (?)                                 1
#>  9         3 follow1           New Event (?)                                 2
#> 10         3 follow2           New Event (?)                                 1
#> 11         3 follow2           New Event (?)                                 2
#> 12         3 follow2           New Event (?)                                 3
#> 13         4 inclusion         NA                                           NA
#> 14         4 follow2           NA                                           NA
#> 15         4 follow2           New Event (?)                                 1
#> 16         4 follow2           New Event (?)                                 2
#> 17         4 follow2           New Event (?)                                 3
#> 18         5 inclusion         NA                                           NA
#> 19         6 inclusion         NA                                           NA
#> # ℹ 20 more variables: cpr <chr>, inclusion <date>, inclusion_time <chr>,
#> #   dob <date>, age <dbl>, age_integer <dbl>, sex <chr>, cohabitation <chr>,
#> #   hypertension <chr>, diabetes <chr>, region <chr>,
#> #   baseline_data_start_complete <chr>, mrs_assessed <chr>, mrs_date <date>,
#> #   mrs_score <dbl>, mrs_complete <chr>, event_datetime <dttm>,
#> #   event_age <lgl>, event_type <chr>, new_event_complete <chr>
#> 
#> $meta
#> # A tibble: 24 × 18
#>    field_name               form_name section_header field_type field_label     
#>    <chr>                    <chr>     <lgl>          <chr>      <chr>           
#>  1 record_id                data      NA             text       record_id       
#>  2 redcap_event_name        data      NA             text       redcap_event_na…
#>  3 redcap_repeat_instrument data      NA             text       redcap_repeat_i…
#>  4 redcap_repeat_instance   data      NA             text       redcap_repeat_i…
#>  5 cpr                      data      NA             text       cpr             
#>  6 inclusion                data      NA             text       inclusion       
#>  7 inclusion_time           data      NA             text       inclusion_time  
#>  8 dob                      data      NA             text       dob             
#>  9 age                      data      NA             text       age             
#> 10 age_integer              data      NA             text       age_integer     
#> # ℹ 14 more rows
#> # ℹ 13 more variables: select_choices_or_calculations <lgl>, field_note <lgl>,
#> #   text_validation_type_or_show_slider_number <chr>,
#> #   text_validation_min <lgl>, text_validation_max <lgl>, identifier <lgl>,
#> #   branching_logic <lgl>, required_field <lgl>, custom_alignment <lgl>,
#> #   question_number <lgl>, matrix_group_name <lgl>, matrix_ranking <lgl>,
#> #   field_annotation <lgl>
#> 
iris |> ds2dd_detailed(add.auto.id = TRUE)
#> A default id column has been added
#> $data
#> # A tibble: 150 × 6
#>    default_trial_id sepal.length sepal.width petal.length petal.width species
#>               <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#>  1                1          5.1         3.5          1.4         0.2 setosa 
#>  2                2          4.9         3            1.4         0.2 setosa 
#>  3                3          4.7         3.2          1.3         0.2 setosa 
#>  4                4          4.6         3.1          1.5         0.2 setosa 
#>  5                5          5           3.6          1.4         0.2 setosa 
#>  6                6          5.4         3.9          1.7         0.4 setosa 
#>  7                7          4.6         3.4          1.4         0.3 setosa 
#>  8                8          5           3.4          1.5         0.2 setosa 
#>  9                9          4.4         2.9          1.4         0.2 setosa 
#> 10               10          4.9         3.1          1.5         0.1 setosa 
#> # ℹ 140 more rows
#> 
#> $meta
#> # A tibble: 6 × 18
#>   field_name       form_name section_header field_type field_label     
#>   <chr>            <chr>     <lgl>          <chr>      <chr>           
#> 1 default_trial_id data      NA             text       default_trial_id
#> 2 sepal.length     data      NA             text       Sepal.Length    
#> 3 sepal.width      data      NA             text       Sepal.Width     
#> 4 petal.length     data      NA             text       Petal.Length    
#> 5 petal.width      data      NA             text       Petal.Width     
#> 6 species          data      NA             radio      Species         
#> # ℹ 13 more variables: select_choices_or_calculations <chr>, field_note <lgl>,
#> #   text_validation_type_or_show_slider_number <chr>,
#> #   text_validation_min <lgl>, text_validation_max <lgl>, identifier <lgl>,
#> #   branching_logic <lgl>, required_field <lgl>, custom_alignment <lgl>,
#> #   question_number <lgl>, matrix_group_name <lgl>, matrix_ranking <lgl>,
#> #   field_annotation <lgl>
#> 
mtcars |> ds2dd_detailed(add.auto.id = TRUE)
#> A default id column has been added
#> $data
#> # A tibble: 32 × 12
#>    default_trial_id   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear
#>               <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1                1  21       6  160    110  3.9   2.62  16.5     0     1     4
#>  2                2  21       6  160    110  3.9   2.88  17.0     0     1     4
#>  3                3  22.8     4  108     93  3.85  2.32  18.6     1     1     4
#>  4                4  21.4     6  258    110  3.08  3.22  19.4     1     0     3
#>  5                5  18.7     8  360    175  3.15  3.44  17.0     0     0     3
#>  6                6  18.1     6  225    105  2.76  3.46  20.2     1     0     3
#>  7                7  14.3     8  360    245  3.21  3.57  15.8     0     0     3
#>  8                8  24.4     4  147.    62  3.69  3.19  20       1     0     4
#>  9                9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4
#> 10               10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4
#> # ℹ 22 more rows
#> # ℹ 1 more variable: carb <dbl>
#> 
#> $meta
#> # A tibble: 12 × 18
#>    field_name       form_name section_header field_type field_label     
#>    <chr>            <chr>     <lgl>          <chr>      <chr>           
#>  1 default_trial_id data      NA             text       default_trial_id
#>  2 mpg              data      NA             text       mpg             
#>  3 cyl              data      NA             text       cyl             
#>  4 disp             data      NA             text       disp            
#>  5 hp               data      NA             text       hp              
#>  6 drat             data      NA             text       drat            
#>  7 wt               data      NA             text       wt              
#>  8 qsec             data      NA             text       qsec            
#>  9 vs               data      NA             text       vs              
#> 10 am               data      NA             text       am              
#> 11 gear             data      NA             text       gear            
#> 12 carb             data      NA             text       carb            
#> # ℹ 13 more variables: select_choices_or_calculations <lgl>, field_note <lgl>,
#> #   text_validation_type_or_show_slider_number <chr>,
#> #   text_validation_min <lgl>, text_validation_max <lgl>, identifier <lgl>,
#> #   branching_logic <lgl>, required_field <lgl>, custom_alignment <lgl>,
#> #   question_number <lgl>, matrix_group_name <lgl>, matrix_ranking <lgl>,
#> #   field_annotation <lgl>
#>