From 3488ff0a7ac1cb960a0cbde4c04b663af5f4e47f Mon Sep 17 00:00:00 2001 From: "Egeler, Paul W" Date: Fri, 25 May 2018 12:02:42 -0400 Subject: [PATCH] Improving SAS code. --- README.md | 59 ++++++++++------ REDCap_split.sas | 171 ++++++++++++++++++++++++----------------------- 2 files changed, 126 insertions(+), 104 deletions(-) diff --git a/README.md b/README.md index 2c94b81..4a1056b 100644 --- a/README.md +++ b/README.md @@ -22,32 +22,49 @@ possible fields; this has the effect of nesting the output table in a way that is not useful in most analysis software. Therefore, I have made a solution to handle the problem in both SAS and R. +## Supported Platforms + +- R +- SAS + +### Coming Soon + +- Python +- VBA + ## Instructions - -### SAS - -1. Run the macro definition in the source editor or using `%include`. -2. Run the SAS code provided by REDCap to import the data BUT COMMENT -THIS LINE: - ```format redcap_repeat_instrument redcap_repeat_instrument_.;``` -3. Open the data dictionary in MS Excel. We will need to do some pre- -processing to the data dictionary file before reading it in because -some of the user entry points (such as **Field Label**) allows for newline -characters, which can break our data ingestion. MS Excel will read in -the newline characters correctly. - - Copy the first four columns and paste into a new sheet. - - Save the new sheet as a .csv file. - - Close the file. -4. Call the macro, adjusting parameters as needed. - ### R -The function definition file contains an example to assist you. +The function definition file contains [roxygen2](https://cran.r-project.org/package=roxygen2) comments to assist you. 1. Run the function definition in the source editor or using `source()`. -2. Download the record dataset and metadata and import them. This can +1. Download the record dataset and metadata. This can be accomplished either by traditional methods or using the API. The `read.csv()` function should be able to handle newline characters within records, so no pre-processing of metadata csv is needed. -3. Call the function, pointing it to your record dataset and metadata -`data.frame`s. \ No newline at end of file +1. Call the function, pointing it to your record dataset and metadata +`data.frame`s. + +### SAS + +1. Download the data, SAS code to load the data, and the data dictionary from REDCap +1. Run the SAS code provided by REDCap to import the data BUT COMMENT +THIS LINE: + ```format redcap_repeat_instrument redcap_repeat_instrument_.;``` +1. Run the RECapRITS macro definitions in the source editor or using `%include`. +1. Run the macro call `%REDCAP_READ_DATA_DICT()` to load the data dictionry into your SAS session, pointing to the file location of your REDCap data dictionary. +1. Run the macro call `%REDCAP_SPLIT()`. You will have an output dataset for +your main table as well as for each repeating instrument. + + +## Issues + +Suggestions and contributions are more than welcome! Please feel free to create an issue or pull request. + +## About REDCap + +This code was written for [REDCap electronic data capture tools](https://projectredcap.org/).^1^ Code for this project was tested on the REDCap instance hosted at Spectrum Health, Grand Rapids, MI. REDCap (Research Electronic Data Capture) is a secure, web-based application designed to support data capture for research studies, providing 1) an intuitive interface for validated data entry; 2) audit trails for tracking data manipulation and export procedures; 3) automated export procedures for seamless data downloads to common statistical packages; and 4) procedures for importing data from external sources. + +## References + +^1^Paul A. Harris, Robert Taylor, Robert Thielke, Jonathon Payne, Nathaniel Gonzalez, Jose G. Conde, Research electronic data capture (REDCap) – A metadata-driven methodology and workflow process for providing translational research informatics support, J Biomed Inform. 2009 Apr;42(2):377-81. diff --git a/REDCap_split.sas b/REDCap_split.sas index 4331f69..727e62d 100644 --- a/REDCap_split.sas +++ b/REDCap_split.sas @@ -20,112 +20,117 @@ * * format redcap_repeat_instrument redcap_repeat_instrument_.; * -* 2. Open the data dictionary in MS Excel. -* -* 2a. Copy the first four columns and paste into a new sheet. -* -* 2b. Save the new sheet as a .csv file. -* -* 2c. Close the file. -* -* 3. Change the data dictionary name and file path in the macro call. -* -* 4. Run the macro definition and the macro call +* 2. Download the data dictionary for your project. + +* 3. Run the macro definitions REDCAP_READ_DATA_DICT and REDCAP_SPLIT +* +* 3. Run the macro call for REDCAP_READ_DATA_DICT to load in the data dictionry. +* This is necessary to split the tables correctly. * +* 4. Run the macro call for REDCAP_SPLIT. You will have an output dataset for +* your main table as well as for each repeating instrument. * ********************************************************************************/ +%MACRO REDCAP_READ_DATA_DICT( + DATA_DICTIONARY /* The file path for the data dictionary */ +); + DATA REDCAP_DATA_DICTIONARY; + + LENGTH VAR_NAME $ 200 FORM_NAME $ 200 SECTION_HEADER $ 200 FIELD_TYPE $ 200 X1-X14 $ 2250; + INFILE "&DATA_DICTIONARY" FIRSTOBS = 2 DSD DLM = "," LRECL=32767; + + INPUT VAR_NAME $ FORM_NAME $ SECTION_HEADER $ FIELD_TYPE $ X1-X14 $; + + IF FIELD_TYPE EQ "descriptive" THEN DELETE; + + DROP SECTION_HEADER X1-X14; + + RUN; + +%MEND REDCAP_READ_DATA_DICT; + + %MACRO REDCAP_SPLIT( - DATA_DICTIONARY /* The file path for the data dictionary */, - DATA_SET = REDCAP/* The name of the SAS dataset created by REDCap */, - KEY = RECORD_ID /* Variable that links base table with other tables */ + DATA_DICTIONARY = REDCAP_DATA_DICTIONARY /* The name of the SAS dataset of the data dictionary */, + DATA_SET = REDCAP/* The name of the SAS dataset created by REDCap */, + KEY = RECORD_ID /* Variable that links base table with other tables */ ); - PROC SQL NOPRINT; + PROC SQL NOPRINT; - SELECT DISTINCT - REDCAP_REPEAT_INSTRUMENT, - "'"!!trim(REDCAP_REPEAT_INSTRUMENT)!!"'" AS INSTRUMENT_QUOTED - INTO - :INSTRUMENTS SEPARATED BY ' ', - :INSTRUMENT_LIST SEPARATED BY ',' - FROM &DATA_SET AS A - WHERE REDCAP_REPEAT_INSTRUMENT GT ''; - - %LET N_SUBTABLES = &SQLOBS; + SELECT DISTINCT + REDCAP_REPEAT_INSTRUMENT, + "'"!!trim(REDCAP_REPEAT_INSTRUMENT)!!"'" AS INSTRUMENT_QUOTED + INTO + :INSTRUMENTS SEPARATED BY ' ', + :INSTRUMENT_LIST SEPARATED BY ',' + FROM &DATA_SET AS A + WHERE REDCAP_REPEAT_INSTRUMENT GT ''; + + %LET N_SUBTABLES = &SQLOBS; - %PUT INSTRUMENTS: %LEFT(&INSTRUMENTS); - %PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST); - %PUT N SUBTABLES: %LEFT(&N_SUBTABLES); + %PUT INSTRUMENTS: %LEFT(&INSTRUMENTS); + %PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST); + %PUT N SUBTABLES: %LEFT(&N_SUBTABLES); - QUIT; - + QUIT; + - %IF &N_SUBTABLES GT 0 %THEN %DO; - - DATA DATA_DICTIONARY; - - LENGTH VAR_NAME $ 255 FORM_NAME $ 255 SECTION_HEADER $ 255 FIELD_TYPE $ 255; - INFILE "&DATA_DICTIONARY" FIRSTOBS = 2 DSD DLM = ","; - - INPUT VAR_NAME $ FORM_NAME $ SECTION_HEADER $ FIELD_TYPE $; + %IF &N_SUBTABLES GT 0 %THEN %DO; + + PROC SQL NOPRINT; - IF FIELD_TYPE EQ "descriptive" THEN DELETE; - - RUN; - - PROC SQL NOPRINT; + SELECT VAR_NAME + INTO :VARS_BASE SEPARATED BY ' ' + FROM &DATA_DICTIONARY. AS A + WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST); - SELECT VAR_NAME - INTO :VARS_BASE SEPARATED BY ' ' - FROM DATA_DICTIONARY AS A - WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST); + %put Base vars: &VARS_BASE; + + %DO I = 1 %TO &N_SUBTABLES; - %put Base vars: &VARS_BASE; - - %DO I = 1 %TO &N_SUBTABLES; + %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); + + SELECT VAR_NAME + INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' ' + FROM &DATA_DICTIONARY. AS A + WHERE FORM_NAME EQ "&INSTRUMENT_I."; - %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); - - SELECT VAR_NAME - INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' ' - FROM DATA_DICTIONARY AS A - WHERE FORM_NAME EQ "&INSTRUMENT_I."; - - %put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I; - - %END; + %put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I; + + %END; - QUIT; + QUIT; - DATA &DATA_SET._BASE (KEEP = &VARS_BASE); - SET &DATA_SET; - - IF MISSING(REDCAP_REPEAT_INSTRUMENT); - RUN; - - %DO I = 1 %TO &N_SUBTABLES; + DATA &DATA_SET._BASE (KEEP = &VARS_BASE); + SET &DATA_SET; + + IF MISSING(REDCAP_REPEAT_INSTRUMENT); + RUN; + + %DO I = 1 %TO &N_SUBTABLES; - %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); - - DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I); - SET &DATA_SET; - - IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I."; - - RUN; - - %END; + %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); + + DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I); + SET &DATA_SET; + + IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I."; + + RUN; + + %END; - %END; + %END; - %ELSE %DO; + %ELSE %DO; - %PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET); - %PUT NO ACTION WAS TAKEN; + %PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET); + %PUT NO ACTION WAS TAKEN; - %END; + %END; %MEND REDCAP_SPLIT;