mirror of
https://github.com/agdamsbo/REDCapCAST.git
synced 2024-11-22 13:30:23 +01:00
Initial Commit
This commit is contained in:
commit
504a414ce6
53
README.md
Normal file
53
README.md
Normal file
@ -0,0 +1,53 @@
|
||||
REDCap Repeating Instrument Table Splitter
|
||||
===========================================
|
||||
|
||||
Paul W. Egeler, M.S., GStat
|
||||
Spectrum Health Office of Research Administration
|
||||
13 July 2017
|
||||
|
||||
## Description
|
||||
|
||||
So the new buzz in the REDCap world seems to be Repeating Instruments
|
||||
and Events. Certainly there is potential for a lot of utility in this
|
||||
feature and I was excited to try it out. I know I will be using this
|
||||
feature a lot in the future.
|
||||
|
||||
Unfortunately, I was not very happy with the way the data was exported
|
||||
either via CSV or API call. When you conceptualize the data model for
|
||||
a Repeating Instrument, you probably think of a multi-table model. You
|
||||
might expect that the non-repeating instruments may constitute one table
|
||||
that would be related to Repeating Instruments tables via a one-to-many
|
||||
relationship. In reality, the data is outputted as one table with all
|
||||
possible fields; this has the effect of nesting the output table in a
|
||||
way that is not useful in most analysis software. Therefore, I have made
|
||||
a solution to handle the problem in both SAS and R.
|
||||
|
||||
## Instructions
|
||||
|
||||
### SAS
|
||||
|
||||
1. Run the macro definition in the source editor or using `%include`.
|
||||
2. Run the SAS code provided by REDCap to import the data BUT COMMENT
|
||||
THIS LINE:
|
||||
`format redcap_repeat_instrument redcap_repeat_instrument_.;`
|
||||
3. Open the data dictionary in MS Excel. We will need to do some pre-
|
||||
processing to the data dictionary file before reading it in because
|
||||
some of the user entry points (such as **Field Label**) allows for newline
|
||||
characters, which can break our data ingestion. MS Excel will read in
|
||||
the newline characters correctly.
|
||||
- Copy the first four columns and paste into a new sheet.
|
||||
- Save the new sheet as a .csv file.
|
||||
- Close the file.
|
||||
4. Call the macro, adjusting parameters as needed.
|
||||
|
||||
### R
|
||||
|
||||
The function definition file contains an example to assist you.
|
||||
|
||||
1. Run the function definition in the source editor or using `source()`.
|
||||
2. Download the record dataset and metadata and import them. This can
|
||||
be accomplished either by traditional methods or using the API. The
|
||||
`read.csv()` function should be able to handle newline characters within
|
||||
records, so no pre-processing of metadata csv is needed.
|
||||
3. Call the function, pointing it to your record dataset and metadata
|
||||
`data.frame`s.
|
87
REDCap_split.r
Normal file
87
REDCap_split.r
Normal file
@ -0,0 +1,87 @@
|
||||
# v0.0.0
|
||||
#' Split REDCap repeating instruments table into multiple tables
|
||||
#'
|
||||
#' This will take a raw data frame from REDCap and split it into a base table
|
||||
#' and give individual tables for each repeating instrument. Metadata
|
||||
#' is used to determine which fields should be included in each resultant table.
|
||||
#'
|
||||
#' @param records data.frame containing the records
|
||||
#' @param metadata data.frame containing the metadata
|
||||
#' @author Paul W. Egeler, M.S., GStat
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' library(jsonlite)
|
||||
#' library(RCurl)
|
||||
#'
|
||||
#' # Get the metadata
|
||||
#' result.meta <- postForm(
|
||||
#' api_url,
|
||||
#' token = api_token,
|
||||
#' content = 'metadata',
|
||||
#' format = 'json'
|
||||
#' )
|
||||
#'
|
||||
#' # Get the records
|
||||
#' result.record <- postForm(
|
||||
#' uri = api_url,
|
||||
#' token = api_token,
|
||||
#' content = 'record',
|
||||
#' format = 'json',
|
||||
#' type = 'flat',
|
||||
#' rawOrLabel = 'raw',
|
||||
#' rawOrLabelHeaders = 'raw',
|
||||
#' exportCheckboxLabel = 'false',
|
||||
#' exportSurveyFields = 'false',
|
||||
#' exportDataAccessGroups = 'false',
|
||||
#' returnFormat = 'json'
|
||||
#' )
|
||||
#'
|
||||
#' records <- fromJSON(result.record)
|
||||
#' metadata <- fromJSON(result.meta)
|
||||
#'
|
||||
#' REDCap_split(records, metadata)
|
||||
#' }
|
||||
#' @return a list of data.frames
|
||||
#' @export
|
||||
REDCap_split <- function(records, metadata) {
|
||||
|
||||
# Check to see if there were any repeating instruments
|
||||
|
||||
if (!any(names(records) == "redcap_repeat_instrument")) {
|
||||
|
||||
warning("There are no repeating instruments.\n")
|
||||
|
||||
return(list(records))
|
||||
|
||||
}
|
||||
|
||||
# Clean the metadata
|
||||
metadata <- metadata[metadata["field_type"] != "descriptive", 1:4]
|
||||
|
||||
# Identify the subtables in the data
|
||||
subtables <- unique(records["redcap_repeat_instrument"])
|
||||
subtables <- subtables[subtables != ""]
|
||||
|
||||
# Split the table based on instrument
|
||||
out <- split.data.frame(records, records["redcap_repeat_instrument"])
|
||||
|
||||
# Delete the variables that are not relevant
|
||||
for (i in names(out)) {
|
||||
|
||||
if (i == "") {
|
||||
|
||||
out[[which(names(out) == "")]] <-
|
||||
out[[which(names(out) == "")]][metadata[`!`(metadata[,2] %in% subtables), 1]]
|
||||
|
||||
} else {
|
||||
|
||||
out[[i]] <-
|
||||
out[[i]][c(names(records[1:3]),metadata[metadata[,2] == i, 1])]
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(out)
|
||||
|
||||
}
|
131
REDCap_split.sas
Normal file
131
REDCap_split.sas
Normal file
@ -0,0 +1,131 @@
|
||||
/********************************************************************************
|
||||
*
|
||||
* FILE: REDCap_split.sas
|
||||
*
|
||||
* VERSION: 0.0.0
|
||||
*
|
||||
* PURPOSE: Take a REDCap dataset with multiple events and make into several
|
||||
* tables with primary and foreign keys
|
||||
*
|
||||
* AUTHOR: Paul W. Egeler, M.S., GStat
|
||||
*
|
||||
* DATE: 22JUN2017
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* INSTRUCTIONS:
|
||||
*
|
||||
* 1. Run the SAS code provided by REDCap to import the data
|
||||
* BUT COMMENT THIS LINE:
|
||||
*
|
||||
* format redcap_repeat_instrument redcap_repeat_instrument_.;
|
||||
*
|
||||
* 2. Open the data dictionary in MS Excel.
|
||||
*
|
||||
* 2a. Copy the first four columns and paste into a new sheet.
|
||||
*
|
||||
* 2b. Save the new sheet as a .csv file.
|
||||
*
|
||||
* 2c. Close the file.
|
||||
*
|
||||
* 3. Change the data dictionary name and file path in the macro call.
|
||||
*
|
||||
* 4. Run the macro definition and the macro call
|
||||
*
|
||||
*
|
||||
********************************************************************************/
|
||||
|
||||
%MACRO REDCAP_SPLIT(
|
||||
DATA_DICTIONARY /* The file path for the data dictionary */,
|
||||
DATA_SET = REDCAP/* The name of the SAS dataset created by REDCap */,
|
||||
KEY = RECORD_ID /* Variable that links base table with other tables */
|
||||
);
|
||||
|
||||
PROC SQL NOPRINT;
|
||||
|
||||
SELECT DISTINCT
|
||||
REDCAP_REPEAT_INSTRUMENT,
|
||||
"'"!!trim(REDCAP_REPEAT_INSTRUMENT)!!"'" AS INSTRUMENT_QUOTED
|
||||
INTO
|
||||
:INSTRUMENTS SEPARATED BY ' ',
|
||||
:INSTRUMENT_LIST SEPARATED BY ','
|
||||
FROM &DATA_SET AS A
|
||||
WHERE REDCAP_REPEAT_INSTRUMENT GT '';
|
||||
|
||||
%LET N_SUBTABLES = &SQLOBS;
|
||||
|
||||
%PUT INSTRUMENTS: %LEFT(&INSTRUMENTS);
|
||||
%PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST);
|
||||
%PUT N SUBTABLES: %LEFT(&N_SUBTABLES);
|
||||
|
||||
QUIT;
|
||||
|
||||
|
||||
%IF &N_SUBTABLES GT 0 %THEN %DO;
|
||||
|
||||
DATA DATA_DICTIONARY;
|
||||
|
||||
LENGTH VAR_NAME $ 255 FORM_NAME $ 255 SECTION_HEADER $ 255 FIELD_TYPE $ 255;
|
||||
INFILE "&DATA_DICTIONARY" FIRSTOBS = 2 DSD DLM = ",";
|
||||
|
||||
INPUT VAR_NAME $ FORM_NAME $ SECTION_HEADER $ FIELD_TYPE $;
|
||||
|
||||
IF FIELD_TYPE EQ "descriptive" THEN DELETE;
|
||||
|
||||
RUN;
|
||||
|
||||
PROC SQL NOPRINT;
|
||||
|
||||
SELECT VAR_NAME
|
||||
INTO :VARS_BASE SEPARATED BY ' '
|
||||
FROM DATA_DICTIONARY AS A
|
||||
WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST);
|
||||
|
||||
%put Base vars: &VARS_BASE;
|
||||
|
||||
%DO I = 1 %TO &N_SUBTABLES;
|
||||
|
||||
%LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( ));
|
||||
|
||||
SELECT VAR_NAME
|
||||
INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' '
|
||||
FROM DATA_DICTIONARY AS A
|
||||
WHERE FORM_NAME EQ "&INSTRUMENT_I.";
|
||||
|
||||
%put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I;
|
||||
|
||||
%END;
|
||||
|
||||
|
||||
QUIT;
|
||||
|
||||
DATA &DATA_SET._BASE (KEEP = &VARS_BASE);
|
||||
SET &DATA_SET;
|
||||
|
||||
IF MISSING(REDCAP_REPEAT_INSTRUMENT);
|
||||
RUN;
|
||||
|
||||
%DO I = 1 %TO &N_SUBTABLES;
|
||||
|
||||
%LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( ));
|
||||
|
||||
DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I);
|
||||
SET &DATA_SET;
|
||||
|
||||
IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I.";
|
||||
|
||||
RUN;
|
||||
|
||||
%END;
|
||||
|
||||
%END;
|
||||
|
||||
%ELSE %DO;
|
||||
|
||||
%PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET);
|
||||
%PUT NO ACTION WAS TAKEN;
|
||||
|
||||
%END;
|
||||
|
||||
|
||||
%MEND REDCAP_SPLIT;
|
Loading…
Reference in New Issue
Block a user