commit 504a414ce66a32a91b45e2f7b47aa2abc4be8852 Author: Egeler, Paul W Date: Fri Jan 19 11:50:08 2018 -0500 Initial Commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..a2657ef --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +REDCap Repeating Instrument Table Splitter +=========================================== + +Paul W. Egeler, M.S., GStat +Spectrum Health Office of Research Administration +13 July 2017 + +## Description + +So the new buzz in the REDCap world seems to be Repeating Instruments +and Events. Certainly there is potential for a lot of utility in this +feature and I was excited to try it out. I know I will be using this +feature a lot in the future. + +Unfortunately, I was not very happy with the way the data was exported +either via CSV or API call. When you conceptualize the data model for +a Repeating Instrument, you probably think of a multi-table model. You +might expect that the non-repeating instruments may constitute one table +that would be related to Repeating Instruments tables via a one-to-many +relationship. In reality, the data is outputted as one table with all +possible fields; this has the effect of nesting the output table in a +way that is not useful in most analysis software. Therefore, I have made +a solution to handle the problem in both SAS and R. + +## Instructions + +### SAS + +1. Run the macro definition in the source editor or using `%include`. +2. Run the SAS code provided by REDCap to import the data BUT COMMENT +THIS LINE: + `format redcap_repeat_instrument redcap_repeat_instrument_.;` +3. Open the data dictionary in MS Excel. We will need to do some pre- +processing to the data dictionary file before reading it in because +some of the user entry points (such as **Field Label**) allows for newline +characters, which can break our data ingestion. MS Excel will read in +the newline characters correctly. + - Copy the first four columns and paste into a new sheet. + - Save the new sheet as a .csv file. + - Close the file. +4. Call the macro, adjusting parameters as needed. + +### R + +The function definition file contains an example to assist you. + +1. Run the function definition in the source editor or using `source()`. +2. Download the record dataset and metadata and import them. This can +be accomplished either by traditional methods or using the API. The +`read.csv()` function should be able to handle newline characters within +records, so no pre-processing of metadata csv is needed. +3. Call the function, pointing it to your record dataset and metadata +`data.frame`s. \ No newline at end of file diff --git a/REDCap_split.r b/REDCap_split.r new file mode 100644 index 0000000..a98fe21 --- /dev/null +++ b/REDCap_split.r @@ -0,0 +1,87 @@ +# v0.0.0 +#' Split REDCap repeating instruments table into multiple tables +#' +#' This will take a raw data frame from REDCap and split it into a base table +#' and give individual tables for each repeating instrument. Metadata +#' is used to determine which fields should be included in each resultant table. +#' +#' @param records data.frame containing the records +#' @param metadata data.frame containing the metadata +#' @author Paul W. Egeler, M.S., GStat +#' @examples +#' \dontrun{ +#' library(jsonlite) +#' library(RCurl) +#' +#' # Get the metadata +#' result.meta <- postForm( +#' api_url, +#' token = api_token, +#' content = 'metadata', +#' format = 'json' +#' ) +#' +#' # Get the records +#' result.record <- postForm( +#' uri = api_url, +#' token = api_token, +#' content = 'record', +#' format = 'json', +#' type = 'flat', +#' rawOrLabel = 'raw', +#' rawOrLabelHeaders = 'raw', +#' exportCheckboxLabel = 'false', +#' exportSurveyFields = 'false', +#' exportDataAccessGroups = 'false', +#' returnFormat = 'json' +#' ) +#' +#' records <- fromJSON(result.record) +#' metadata <- fromJSON(result.meta) +#' +#' REDCap_split(records, metadata) +#' } +#' @return a list of data.frames +#' @export +REDCap_split <- function(records, metadata) { + + # Check to see if there were any repeating instruments + + if (!any(names(records) == "redcap_repeat_instrument")) { + + warning("There are no repeating instruments.\n") + + return(list(records)) + + } + + # Clean the metadata + metadata <- metadata[metadata["field_type"] != "descriptive", 1:4] + + # Identify the subtables in the data + subtables <- unique(records["redcap_repeat_instrument"]) + subtables <- subtables[subtables != ""] + + # Split the table based on instrument + out <- split.data.frame(records, records["redcap_repeat_instrument"]) + + # Delete the variables that are not relevant + for (i in names(out)) { + + if (i == "") { + + out[[which(names(out) == "")]] <- + out[[which(names(out) == "")]][metadata[`!`(metadata[,2] %in% subtables), 1]] + + } else { + + out[[i]] <- + out[[i]][c(names(records[1:3]),metadata[metadata[,2] == i, 1])] + + } + + } + + return(out) + +} diff --git a/REDCap_split.sas b/REDCap_split.sas new file mode 100644 index 0000000..4331f69 --- /dev/null +++ b/REDCap_split.sas @@ -0,0 +1,131 @@ +/******************************************************************************** +* +* FILE: REDCap_split.sas +* +* VERSION: 0.0.0 +* +* PURPOSE: Take a REDCap dataset with multiple events and make into several +* tables with primary and foreign keys +* +* AUTHOR: Paul W. Egeler, M.S., GStat +* +* DATE: 22JUN2017 +* +******************************************************************************* +* +* INSTRUCTIONS: +* +* 1. Run the SAS code provided by REDCap to import the data +* BUT COMMENT THIS LINE: +* +* format redcap_repeat_instrument redcap_repeat_instrument_.; +* +* 2. Open the data dictionary in MS Excel. +* +* 2a. Copy the first four columns and paste into a new sheet. +* +* 2b. Save the new sheet as a .csv file. +* +* 2c. Close the file. +* +* 3. Change the data dictionary name and file path in the macro call. +* +* 4. Run the macro definition and the macro call +* +* +********************************************************************************/ + +%MACRO REDCAP_SPLIT( + DATA_DICTIONARY /* The file path for the data dictionary */, + DATA_SET = REDCAP/* The name of the SAS dataset created by REDCap */, + KEY = RECORD_ID /* Variable that links base table with other tables */ +); + + PROC SQL NOPRINT; + + SELECT DISTINCT + REDCAP_REPEAT_INSTRUMENT, + "'"!!trim(REDCAP_REPEAT_INSTRUMENT)!!"'" AS INSTRUMENT_QUOTED + INTO + :INSTRUMENTS SEPARATED BY ' ', + :INSTRUMENT_LIST SEPARATED BY ',' + FROM &DATA_SET AS A + WHERE REDCAP_REPEAT_INSTRUMENT GT ''; + + %LET N_SUBTABLES = &SQLOBS; + + %PUT INSTRUMENTS: %LEFT(&INSTRUMENTS); + %PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST); + %PUT N SUBTABLES: %LEFT(&N_SUBTABLES); + + QUIT; + + + %IF &N_SUBTABLES GT 0 %THEN %DO; + + DATA DATA_DICTIONARY; + + LENGTH VAR_NAME $ 255 FORM_NAME $ 255 SECTION_HEADER $ 255 FIELD_TYPE $ 255; + INFILE "&DATA_DICTIONARY" FIRSTOBS = 2 DSD DLM = ","; + + INPUT VAR_NAME $ FORM_NAME $ SECTION_HEADER $ FIELD_TYPE $; + + IF FIELD_TYPE EQ "descriptive" THEN DELETE; + + RUN; + + PROC SQL NOPRINT; + + SELECT VAR_NAME + INTO :VARS_BASE SEPARATED BY ' ' + FROM DATA_DICTIONARY AS A + WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST); + + %put Base vars: &VARS_BASE; + + %DO I = 1 %TO &N_SUBTABLES; + + %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); + + SELECT VAR_NAME + INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' ' + FROM DATA_DICTIONARY AS A + WHERE FORM_NAME EQ "&INSTRUMENT_I."; + + %put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I; + + %END; + + + QUIT; + + DATA &DATA_SET._BASE (KEEP = &VARS_BASE); + SET &DATA_SET; + + IF MISSING(REDCAP_REPEAT_INSTRUMENT); + RUN; + + %DO I = 1 %TO &N_SUBTABLES; + + %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); + + DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I); + SET &DATA_SET; + + IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I."; + + RUN; + + %END; + + %END; + + %ELSE %DO; + + %PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET); + %PUT NO ACTION WAS TAKEN; + + %END; + + +%MEND REDCAP_SPLIT;