Improving SAS code.

This commit is contained in:
Egeler, Paul W 2018-05-25 12:02:42 -04:00
parent cbc39e288e
commit 3488ff0a7a
2 changed files with 126 additions and 104 deletions

View File

@ -22,32 +22,49 @@ possible fields; this has the effect of nesting the output table in a
way that is not useful in most analysis software. Therefore, I have made
a solution to handle the problem in both SAS and R.
## Supported Platforms
- R
- SAS
### Coming Soon
- Python
- VBA
## Instructions
### SAS
1. Run the macro definition in the source editor or using `%include`.
2. Run the SAS code provided by REDCap to import the data BUT COMMENT
THIS LINE:
```format redcap_repeat_instrument redcap_repeat_instrument_.;```
3. Open the data dictionary in MS Excel. We will need to do some pre-
processing to the data dictionary file before reading it in because
some of the user entry points (such as **Field Label**) allows for newline
characters, which can break our data ingestion. MS Excel will read in
the newline characters correctly.
- Copy the first four columns and paste into a new sheet.
- Save the new sheet as a .csv file.
- Close the file.
4. Call the macro, adjusting parameters as needed.
### R
The function definition file contains an example to assist you.
The function definition file contains [roxygen2](https://cran.r-project.org/package=roxygen2) comments to assist you.
1. Run the function definition in the source editor or using `source()`.
2. Download the record dataset and metadata and import them. This can
1. Download the record dataset and metadata. This can
be accomplished either by traditional methods or using the API. The
`read.csv()` function should be able to handle newline characters within
records, so no pre-processing of metadata csv is needed.
3. Call the function, pointing it to your record dataset and metadata
`data.frame`s.
1. Call the function, pointing it to your record dataset and metadata
`data.frame`s.
### SAS
1. Download the data, SAS code to load the data, and the data dictionary from REDCap
1. Run the SAS code provided by REDCap to import the data BUT COMMENT
THIS LINE:
```format redcap_repeat_instrument redcap_repeat_instrument_.;```
1. Run the RECapRITS macro definitions in the source editor or using `%include`.
1. Run the macro call `%REDCAP_READ_DATA_DICT()` to load the data dictionry into your SAS session, pointing to the file location of your REDCap data dictionary.
1. Run the macro call `%REDCAP_SPLIT()`. You will have an output dataset for
your main table as well as for each repeating instrument.
## Issues
Suggestions and contributions are more than welcome! Please feel free to create an issue or pull request.
## About REDCap
This code was written for [REDCap electronic data capture tools](https://projectredcap.org/).^1^ Code for this project was tested on the REDCap instance hosted at Spectrum Health, Grand Rapids, MI. REDCap (Research Electronic Data Capture) is a secure, web-based application designed to support data capture for research studies, providing 1) an intuitive interface for validated data entry; 2) audit trails for tracking data manipulation and export procedures; 3) automated export procedures for seamless data downloads to common statistical packages; and 4) procedures for importing data from external sources.
## References
^1^Paul A. Harris, Robert Taylor, Robert Thielke, Jonathon Payne, Nathaniel Gonzalez, Jose G. Conde, Research electronic data capture (REDCap) A metadata-driven methodology and workflow process for providing translational research informatics support, J Biomed Inform. 2009 Apr;42(2):377-81.

View File

@ -20,112 +20,117 @@
*
* format redcap_repeat_instrument redcap_repeat_instrument_.;
*
* 2. Open the data dictionary in MS Excel.
*
* 2a. Copy the first four columns and paste into a new sheet.
*
* 2b. Save the new sheet as a .csv file.
*
* 2c. Close the file.
*
* 3. Change the data dictionary name and file path in the macro call.
*
* 4. Run the macro definition and the macro call
* 2. Download the data dictionary for your project.
* 3. Run the macro definitions REDCAP_READ_DATA_DICT and REDCAP_SPLIT
*
* 3. Run the macro call for REDCAP_READ_DATA_DICT to load in the data dictionry.
* This is necessary to split the tables correctly.
*
* 4. Run the macro call for REDCAP_SPLIT. You will have an output dataset for
* your main table as well as for each repeating instrument.
*
********************************************************************************/
%MACRO REDCAP_READ_DATA_DICT(
DATA_DICTIONARY /* The file path for the data dictionary */
);
DATA REDCAP_DATA_DICTIONARY;
LENGTH VAR_NAME $ 200 FORM_NAME $ 200 SECTION_HEADER $ 200 FIELD_TYPE $ 200 X1-X14 $ 2250;
INFILE "&DATA_DICTIONARY" FIRSTOBS = 2 DSD DLM = "," LRECL=32767;
INPUT VAR_NAME $ FORM_NAME $ SECTION_HEADER $ FIELD_TYPE $ X1-X14 $;
IF FIELD_TYPE EQ "descriptive" THEN DELETE;
DROP SECTION_HEADER X1-X14;
RUN;
%MEND REDCAP_READ_DATA_DICT;
%MACRO REDCAP_SPLIT(
DATA_DICTIONARY /* The file path for the data dictionary */,
DATA_SET = REDCAP/* The name of the SAS dataset created by REDCap */,
KEY = RECORD_ID /* Variable that links base table with other tables */
DATA_DICTIONARY = REDCAP_DATA_DICTIONARY /* The name of the SAS dataset of the data dictionary */,
DATA_SET = REDCAP/* The name of the SAS dataset created by REDCap */,
KEY = RECORD_ID /* Variable that links base table with other tables */
);
PROC SQL NOPRINT;
PROC SQL NOPRINT;
SELECT DISTINCT
REDCAP_REPEAT_INSTRUMENT,
"'"!!trim(REDCAP_REPEAT_INSTRUMENT)!!"'" AS INSTRUMENT_QUOTED
INTO
:INSTRUMENTS SEPARATED BY ' ',
:INSTRUMENT_LIST SEPARATED BY ','
FROM &DATA_SET AS A
WHERE REDCAP_REPEAT_INSTRUMENT GT '';
%LET N_SUBTABLES = &SQLOBS;
SELECT DISTINCT
REDCAP_REPEAT_INSTRUMENT,
"'"!!trim(REDCAP_REPEAT_INSTRUMENT)!!"'" AS INSTRUMENT_QUOTED
INTO
:INSTRUMENTS SEPARATED BY ' ',
:INSTRUMENT_LIST SEPARATED BY ','
FROM &DATA_SET AS A
WHERE REDCAP_REPEAT_INSTRUMENT GT '';
%LET N_SUBTABLES = &SQLOBS;
%PUT INSTRUMENTS: %LEFT(&INSTRUMENTS);
%PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST);
%PUT N SUBTABLES: %LEFT(&N_SUBTABLES);
%PUT INSTRUMENTS: %LEFT(&INSTRUMENTS);
%PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST);
%PUT N SUBTABLES: %LEFT(&N_SUBTABLES);
QUIT;
QUIT;
%IF &N_SUBTABLES GT 0 %THEN %DO;
DATA DATA_DICTIONARY;
LENGTH VAR_NAME $ 255 FORM_NAME $ 255 SECTION_HEADER $ 255 FIELD_TYPE $ 255;
INFILE "&DATA_DICTIONARY" FIRSTOBS = 2 DSD DLM = ",";
INPUT VAR_NAME $ FORM_NAME $ SECTION_HEADER $ FIELD_TYPE $;
%IF &N_SUBTABLES GT 0 %THEN %DO;
PROC SQL NOPRINT;
IF FIELD_TYPE EQ "descriptive" THEN DELETE;
RUN;
PROC SQL NOPRINT;
SELECT VAR_NAME
INTO :VARS_BASE SEPARATED BY ' '
FROM &DATA_DICTIONARY. AS A
WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST);
SELECT VAR_NAME
INTO :VARS_BASE SEPARATED BY ' '
FROM DATA_DICTIONARY AS A
WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST);
%put Base vars: &VARS_BASE;
%DO I = 1 %TO &N_SUBTABLES;
%put Base vars: &VARS_BASE;
%DO I = 1 %TO &N_SUBTABLES;
%LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( ));
SELECT VAR_NAME
INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' '
FROM &DATA_DICTIONARY. AS A
WHERE FORM_NAME EQ "&INSTRUMENT_I.";
%LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( ));
SELECT VAR_NAME
INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' '
FROM DATA_DICTIONARY AS A
WHERE FORM_NAME EQ "&INSTRUMENT_I.";
%put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I;
%END;
%put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I;
%END;
QUIT;
QUIT;
DATA &DATA_SET._BASE (KEEP = &VARS_BASE);
SET &DATA_SET;
IF MISSING(REDCAP_REPEAT_INSTRUMENT);
RUN;
%DO I = 1 %TO &N_SUBTABLES;
DATA &DATA_SET._BASE (KEEP = &VARS_BASE);
SET &DATA_SET;
IF MISSING(REDCAP_REPEAT_INSTRUMENT);
RUN;
%DO I = 1 %TO &N_SUBTABLES;
%LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( ));
DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I);
SET &DATA_SET;
IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I.";
RUN;
%END;
%LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( ));
DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I);
SET &DATA_SET;
IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I.";
RUN;
%END;
%END;
%END;
%ELSE %DO;
%ELSE %DO;
%PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET);
%PUT NO ACTION WAS TAKEN;
%PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET);
%PUT NO ACTION WAS TAKEN;
%END;
%END;
%MEND REDCAP_SPLIT;