diff --git a/SAS/REDCap_split.sas b/SAS/REDCap_split.sas index 90e7de3..2e90073 100644 --- a/SAS/REDCap_split.sas +++ b/SAS/REDCap_split.sas @@ -2,7 +2,7 @@ * * FILE: REDCap_split.sas * -* VERSION: 0.0.0 +* VERSION: 0.1.0 * * PURPOSE: Take a REDCap dataset with multiple events and make into several * tables with primary and foreign keys @@ -50,10 +50,27 @@ %MACRO REDCAP_SPLIT( DATA_SET = REDCAP /* The name of the SAS dataset created by REDCap */, - DATA_DICTIONARY = REDCAP_DATA_DICTIONARY /* The name of the SAS dataset of the data dictionary */ + DATA_DICTIONARY = REDCAP_DATA_DICTIONARY /* The name of the SAS dataset of the data dictionary */, + NUMERIC_SUBTABLES = N /* Y/N: Should the subtables be numbered (Y) or should the be based on the name of the repeating instrument (N)? */ ); + /* Find the key that links the base table to child tables */ + /* Also check that REDCAP_REPEAT_INSTRUMENT is present in the data */ + %LET DSID = %SYSFUNC(OPEN(&DATA_SET)); + %LET KEY = %SYSFUNC(VARNAME(&DSID, 1)); + %LET CHECK = %SYSFUNC(VARNUM(&DSID, REDCAP_REPEAT_INSTRUMENT)); + %LET RC = %SYSFUNC(CLOSE(&DSID)); + + %IF &CHECK EQ 0 %THEN %DO; + + %PUT ERROR: The dataset &DATA_SET does not contain repeating instruments.; + %PUT ERROR: Stopping MACRO.; + %GOTO FINISH; + + %END; + /* Remove formatting from repeat instrument field */ + %PUT NOTE: Removing formatting from REDCAP_REPEAT_INSTRUMENT in &DATA_SET..; DATA &DATA_SET.; SET &DATA_SET.; FORMAT REDCAP_REPEAT_INSTRUMENT; @@ -73,120 +90,140 @@ %LET N_SUBTABLES = &SQLOBS; - %PUT INSTRUMENTS: %LEFT(&INSTRUMENTS); - %PUT INSTRUMENT LIST: %LEFT(&INSTRUMENT_LIST); - %PUT N SUBTABLES: %LEFT(&N_SUBTABLES); - QUIT; - %IF &N_SUBTABLES GT 0 %THEN %DO; - - /* Get information on the variables in the dataset */ - PROC CONTENTS - DATA = &DATA_SET. - OUT = REDCAP_VARNAMES(KEEP=NAME VARNUM) - NOPRINT; - RUN; + %IF &N_SUBTABLES EQ 0 %THEN %DO; - /* Find the key that links the base table to child tables */ - DATA _NULL_; - SET REDCAP_VARNAMES; - IF VARNUM EQ 1 THEN DO; - CALL SYMPUT("KEY", NAME); - STOP; - END; - RUN; - - /* Make a list of fields and their associated forms based on data dictionary */ - DATA REDCAP_FIELDS(KEEP=VAR_NAME FORM_NAME); - SET &DATA_DICTIONARY.; - IF FIELD_TYPE EQ "checkbox" THEN DO; - BASENAME = VAR_NAME; - DO I = 1 TO N; - SET REDCAP_VARNAMES POINT=I NOBS=N; - IF PRXMATCH("/^"!!trim(BASENAME)!!"___.+$/", NAME) THEN DO; - VAR_NAME = NAME; - OUTPUT; - END; - END; - END; - ELSE OUTPUT; - RUN; + %PUT WARNING: There were no records containing repeating instruments in the dataset %LEFT(&DATA_SET).; + %PUT WARNING: No action was taken.; + %GOTO FINISH; - /* Add instrument status fields to list of fields */ - PROC SQL; + %END; + + %PUT N SUBTABLES: %LEFT(&N_SUBTABLES); + %PUT INSTRUMENTS: %LEFT(&INSTRUMENTS); + %PUT INSTRUMENT LIST: %LEFT(%BQUOTE(&INSTRUMENT_LIST)); + + /* Get information on the variables in the dataset */ + PROC CONTENTS + DATA = &DATA_SET. + OUT = REDCAP_VARNAMES(KEEP=NAME VARNUM) + NOPRINT; + RUN; + + /* Make a list of fields and their associated forms based on data dictionary */ + DATA REDCAP_FIELDS(KEEP=VAR_NAME FORM_NAME); + SET &DATA_DICTIONARY.; + IF FIELD_TYPE EQ "checkbox" THEN DO; + BASENAME = VAR_NAME; + DO I = 1 TO N; + SET REDCAP_VARNAMES POINT=I NOBS=N; + IF PRXMATCH("/^"!!trim(BASENAME)!!"___.+$/", NAME) THEN DO; + VAR_NAME = NAME; + OUTPUT; + END; + END; + END; + ELSE OUTPUT; + RUN; + + /* Add instrument status fields to list of fields */ + PROC SQL; CREATE TABLE REDCAP_INSTRUMENT_STATUS_FIELDS AS - SELECT DISTINCT TRIM(FORM_NAME)!!"_complete" AS VAR_NAME LENGTH=200, FORM_NAME - FROM REDCAP_DATA_DICTIONARY; + SELECT DISTINCT TRIM(FORM_NAME)!!"_complete" AS VAR_NAME LENGTH=200, FORM_NAME + FROM &DATA_DICTIONARY.; - QUIT; + QUIT; - PROC APPEND - BASE=REDCAP_FIELDS - DATA=REDCAP_INSTRUMENT_STATUS_FIELDS; - RUN; - - /* Sort out the field names */ - PROC SQL NOPRINT; + PROC APPEND + BASE=REDCAP_FIELDS + DATA=REDCAP_INSTRUMENT_STATUS_FIELDS; + RUN; - SELECT VAR_NAME - INTO :VARS_BASE SEPARATED BY ' ' - FROM REDCAP_FIELDS AS A - WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST); + DATA REDCAP_FIELDS; + SET REDCAP_FIELDS; - %put Base vars: &VARS_BASE; - - %DO I = 1 %TO &N_SUBTABLES; + IF LENGTH(VAR_NAME) GT 32 THEN DO; + PUT "WARNING: The variable " VAR_NAME "is too long (MAX 32 CHARACTERS)."; + PUT "WARNING: " VAR_NAME "will not be included in the output dataset."; + DELETE; + END; - %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); - - SELECT VAR_NAME - INTO :VARS_&INSTRUMENT_I. SEPARATED BY ' ' - FROM REDCAP_FIELDS AS A - WHERE FORM_NAME EQ "&INSTRUMENT_I."; + RUN; - %put &INSTRUMENT_I. vars: &&VARS_&INSTRUMENT_I; - - %END; + /* Sort out the field names */ + PROC SQL NOPRINT; + SELECT VAR_NAME + INTO :VARS_BASE SEPARATED BY ' ' + FROM REDCAP_FIELDS AS A + WHERE FORM_NAME NOT IN (&INSTRUMENT_LIST); - QUIT; + %put Base vars: &VARS_BASE; - /* Make new data sets based on field names above */ - DATA &DATA_SET._BASE (KEEP = &VARS_BASE); - SET &DATA_SET; - - IF MISSING(REDCAP_REPEAT_INSTRUMENT); - RUN; - %DO I = 1 %TO &N_SUBTABLES; %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); - - DATA &DATA_SET._&INSTRUMENT_I. (KEEP = &KEY redcap_repeat_instance &&VARS_&INSTRUMENT_I); - SET &DATA_SET; - - IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I."; - - RUN; - + + SELECT VAR_NAME + INTO :VARS_&I. SEPARATED BY ' ' + FROM REDCAP_FIELDS AS A + WHERE FORM_NAME EQ "&INSTRUMENT_I."; + + %PUT &INSTRUMENT_I. vars: &&VARS_&I; + %END; + QUIT; + + /* Make new data sets based on field names above */ + DATA &DATA_SET._BASE (KEEP = &VARS_BASE); + SET &DATA_SET; + IF MISSING(REDCAP_REPEAT_INSTRUMENT); + RUN; + + %DO I = 1 %TO &N_SUBTABLES; + + %LET INSTRUMENT_I = %SCAN(&INSTRUMENTS,&I,%STR( )); + + /* Create either numeric table name or name based on repeating instrument name (see issue #12) */ + %IF &NUMERIC_SUBTABLES EQ N %THEN %DO; + + %LET SUBTABLE_NAME = &DATA_SET._&INSTRUMENT_I.; + + %IF %LENGTH(&SUBTABLE_NAME) GT 32 %THEN %DO; + + %PUT ERROR: The table name &SUBTABLE_NAME is %LENGTH(&SUBTABLE_NAME) characters long (MAX 32 CHARACTERS).; + %PUT ERROR: &SUBTABLE_NAME will be skipped. Consider setting NUMERIC_SUBTABLES = Y.; + %GOTO SKIP; + + %END; + + %END; + %ELSE %DO; + + %LET SUBTABLE_NAME = &DATA_SET._&I.; + + %END; + + DATA &SUBTABLE_NAME (KEEP = &KEY redcap_repeat_instance &&VARS_&I); + SET &DATA_SET; + IF REDCAP_REPEAT_INSTRUMENT EQ "&INSTRUMENT_I."; + RUN; + + %PUT NOTE: Records from instrument &INSTRUMENT_I have been placed in &SUBTABLE_NAME; + + %SKIP: + + %END; + /* Clean up temporary datasets */ PROC DATASETS MEMTYPE=DATA LIBRARY=WORK NOLIST; DELETE REDCAP_FIELDS REDCAP_VARNAMES REDCAP_INSTRUMENT_STATUS_FIELDS; RUN; - %END; - - %ELSE %DO; - - %PUT THERE WERE NO REPEAT INSTRUMENTS IN THE DATASET %LEFT(&DATA_SET); - %PUT NO ACTION WAS TAKEN; - - %END; - + %FINISH: %MEND REDCAP_SPLIT;