Complete workflow. Comments are lacking a bit
This commit is contained in:
parent
897d391add
commit
fd920d2027
@ -3,7 +3,8 @@
|
|||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
##
|
##
|
||||||
## This script is to create a datadictionary for REDCap, to upload data.
|
## This script is to create a datadictionary for REDCap, to upload data.
|
||||||
##
|
## The data frame consits of severral excel sheets, nad the database already contains data
|
||||||
|
## Instead of a complete datadictionary, individual sheets will be converted to instrument files.
|
||||||
##
|
##
|
||||||
|
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
@ -20,22 +21,65 @@ library(dplyr)
|
|||||||
|
|
||||||
setwd("/Volumes/Data/toorisky/")
|
setwd("/Volumes/Data/toorisky/")
|
||||||
|
|
||||||
dta<-read_dta("Alle apo Aarhus 2018.dta") %>% # Defined dataset
|
dput(excel_sheets("2022-02-08_ddsc_dataexport.xlsx")) # Getting sheet names
|
||||||
filter(VaskDiag==1) %>%
|
|
||||||
mutate(treatment=factor(case_when(trombolyse!=2&trombektomi!=2 ~ 0,
|
|
||||||
trombolyse==2|trombektomi==2 ~ 1)),
|
|
||||||
sex.n=factor(ifelse(as.integer(substr(cpr, start = 10, stop = 10)) %%2 == 0,
|
|
||||||
"female", "male"))) %>%
|
|
||||||
left_join(.,read_excel("2022-02-08_ddsc_dataexport.xlsx", sheet = "Patienter"),
|
|
||||||
by=c("ForloebID"="forloebid"))%>%
|
|
||||||
left_join(.,read_excel("2022-02-08_ddsc_dataexport.xlsx", sheet = "3 mdr. opf."),
|
|
||||||
by=c("ForloebID"="ForloebID"))%>%
|
|
||||||
mutate(cpr=cpr.x,
|
|
||||||
ID=ID.x)%>%
|
|
||||||
dplyr::select(-starts_with("cpr."))%>%
|
|
||||||
dplyr::select(-starts_with("ID."))
|
|
||||||
|
|
||||||
colnames(dta)<-tolower(colnames(dta))
|
shts_sel<-c("Patienter", # Selecting relevant sheets
|
||||||
|
"Basis-skema",
|
||||||
|
"Trombolyse",
|
||||||
|
"Trombektomi",
|
||||||
|
"3 mdr. opf.")
|
||||||
|
|
||||||
|
inst_names<-c("Patienter", # Renaming to avoid trailing numbers and spaces, as per REDCap
|
||||||
|
"Basisskema",
|
||||||
|
"Trombolyse",
|
||||||
|
"Trombektomi",
|
||||||
|
"Followup")
|
||||||
|
|
||||||
|
ids<-read_dta("Alle apo Aarhus 2018.dta") |> # Defined dataset
|
||||||
|
filter(VaskDiag==1) |>
|
||||||
|
select("ForloebID") |>
|
||||||
|
data.frame()
|
||||||
|
|
||||||
|
colnames(ids)<-tolower(colnames(ids))
|
||||||
|
|
||||||
|
## Importing xls sheets and naming list from sheet names
|
||||||
|
r_lup<-list()
|
||||||
|
|
||||||
|
for (i in 1:length(shts_sel)){
|
||||||
|
xcl<-read_excel("2022-02-08_ddsc_dataexport.xlsx", sheet = shts_sel[i])|>
|
||||||
|
dplyr::select(-starts_with("cpr"))|>
|
||||||
|
dplyr::select(-starts_with("ID"))|>
|
||||||
|
data.frame()
|
||||||
|
|
||||||
|
colnames(xcl)<-tolower(colnames(xcl))
|
||||||
|
|
||||||
|
r_lup[[i]]<-xcl|>
|
||||||
|
filter(forloebid %in% ids$forloebid)
|
||||||
|
names(r_lup)[i]<-inst_names[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
cnames<-c()
|
||||||
|
for (i in 1:length(r_lup)){
|
||||||
|
cnames<-c(cnames,colnames(r_lup[[i]]))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
## Appending suffix for duplicated colnames from different sheets
|
||||||
|
dup_names<-unique(cnames[duplicated(cnames)])[-1] # omit the first, "forloebid"
|
||||||
|
|
||||||
|
for (i in 1:length(r_lup)){
|
||||||
|
# Getting sheetname bit
|
||||||
|
sht_name<-gsub('[ .]', '', names(r_lup)[i])|>
|
||||||
|
substr(1,6)|>
|
||||||
|
tolower()|>
|
||||||
|
(\(x) paste0("_",x))() # Lambda function in lack of better
|
||||||
|
|
||||||
|
# Pasting
|
||||||
|
sht_dup<-paste0(colnames(r_lup[[i]])[colnames(r_lup[[i]]) %in% dup_names],sht_name)
|
||||||
|
|
||||||
|
# Renaming
|
||||||
|
colnames(r_lup[[i]])[colnames(r_lup[[i]]) %in% dup_names] <- sht_dup
|
||||||
|
}
|
||||||
|
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
## Fix missing record_id's for upload
|
## Fix missing record_id's for upload
|
||||||
@ -53,14 +97,17 @@ redcap <- redcap_read_oneshot(
|
|||||||
fields = c("record_id","forloebid")
|
fields = c("record_id","forloebid")
|
||||||
)$data
|
)$data
|
||||||
|
|
||||||
|
## Adding and creating record_id's
|
||||||
|
ids_r<-full_join(ids,redcap)
|
||||||
|
|
||||||
## Joining and adding record_id's
|
n_na<-length(ids_r$record_id[is.na(ids_r$record_id)])
|
||||||
dta<-full_join(dta,redcap)
|
n_id<-max(ids_r$record_id,na.rm=T)
|
||||||
|
|
||||||
n_na<-length(dta$record_id[is.na(dta$record_id)])
|
|
||||||
n_id<-max(dta$record_id,na.rm=T)
|
|
||||||
# filter(!is.na(akut_ind))%>%
|
# filter(!is.na(akut_ind))%>%
|
||||||
dta$record_id[is.na(dta$record_id)]<-(n_id+1):(n_id+n_na) # Simple math
|
ids_r$record_id[is.na(ids_r$record_id)]<-(n_id+1):(n_id+n_na) # Simple math
|
||||||
|
|
||||||
|
for (i in 1:length(r_lup)){
|
||||||
|
r_lup[[i]]<-left_join(r_lup[[i]],ids_r)
|
||||||
|
}
|
||||||
|
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
## Data dictionary
|
## Data dictionary
|
||||||
@ -68,32 +115,105 @@ dta$record_id[is.na(dta$record_id)]<-(n_id+1):(n_id+n_na) # Simple math
|
|||||||
|
|
||||||
setwd("/Users/au301842/nottreated/")
|
setwd("/Users/au301842/nottreated/")
|
||||||
|
|
||||||
icname<-colnames(read.csv("examlpe instrument.csv"))
|
icname<-colnames(read.csv("example instrument.csv"))
|
||||||
dd<-data.frame(matrix(ncol = length(icname),
|
|
||||||
nrow=ncol(dta))) ## Data frame to collect all
|
dd<-data.frame(matrix(ncol = length(icname))) ## Data frame to collect all
|
||||||
colnames(dd)<-icname
|
colnames(dd)<-icname
|
||||||
|
|
||||||
|
## Instrument for DataDictionary
|
||||||
|
## Format dataset for REDCap upload
|
||||||
|
|
||||||
|
for (i in 1:length(r_lup)){
|
||||||
|
dd_i<-data.frame(matrix(ncol = length(icname),nrow = ncol("[["(r_lup,i)))) ## Data frame to collect all
|
||||||
|
|
||||||
|
colnames(dd_i)<-icname ## for easier reading
|
||||||
|
|
||||||
## Variable names
|
## Variable names
|
||||||
dd[1]<-colnames(dta)
|
dd_i[1]<-colnames("[["(r_lup,i))
|
||||||
|
|
||||||
## Form Name
|
## Form Name
|
||||||
dd[2]<-"ddsc"
|
dd_i[2]<-names(r_lup)[i]
|
||||||
|
|
||||||
## Field Type
|
## Field Type
|
||||||
# dd_i[4]<-ifelse(sapply(r_lup[[i]], class)=="factor","radio","text")
|
# dd_i[4]<-ifelse(sapply(r_lup[[i]], class)=="factor","radio","text")
|
||||||
dd[4]<-"text"
|
dd_i[4]<-"text"
|
||||||
|
|
||||||
dd[5]<-colnames(dta)
|
## Field Label
|
||||||
|
## Using original attributes as field labels
|
||||||
|
fl<-lapply(r_lup[[i]], attr, "label")
|
||||||
|
for (j in 1:length(fl)){
|
||||||
|
fl[[j]]<-ifelse(is.null(fl[[j]]),
|
||||||
|
names(fl)[[j]],
|
||||||
|
fl[[j]])
|
||||||
|
## If no attributes, variable name is used as "placeholder"
|
||||||
|
}
|
||||||
|
dd_i[5]<-unlist(fl)
|
||||||
|
|
||||||
|
## Merge all
|
||||||
|
dd<-rbind(dd,dd_i)
|
||||||
|
}
|
||||||
|
|
||||||
|
dd_exp<-dd %>%
|
||||||
|
filter(!Variable...Field.Name %in% c("forloebid","record_id")) # These fields already exists in the database, and would cause conflicts
|
||||||
|
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
## Instrument file
|
## Instrument files
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
|
|
||||||
write.csv(dd,"ddsc_instrument.csv",row.names = FALSE,na="")
|
# Loop for doing it all - nice!
|
||||||
|
# Splitting individual instruments from Data Dictionary file
|
||||||
|
for (i in 1:length(inst_names)){
|
||||||
|
|
||||||
|
if (!file.exists("instruments/")){ # Creating "instrument" subdirectory if it does not already exist
|
||||||
|
dir.create("instruments/", showWarnings = TRUE, recursive = FALSE, mode = "0777")}
|
||||||
|
|
||||||
|
new_path<-paste0("instruments/",inst_names[i]) # Path for instrument folder
|
||||||
|
dir.create(new_path, showWarnings = FALSE, recursive = FALSE, mode = "0777") # Creating the folder path
|
||||||
|
|
||||||
|
file.copy("OriginID.txt", # Copying origin file as per REDCap
|
||||||
|
new_path)
|
||||||
|
|
||||||
|
dd_exp|>
|
||||||
|
filter(Form.Name==inst_names[i])|> # Filtering instrument entries
|
||||||
|
write.csv(paste0(new_path,"/instrument.csv"),row.names = FALSE,na="") # Exporting to folder
|
||||||
|
|
||||||
|
setwd(paste0("/Users/au301842/nottreated/",new_path)) # Setting wd in subfolder for zipping
|
||||||
|
zip(zipfile = inst_names[i], # Naming the zipfile according to instrument
|
||||||
|
files = dir(full.names = TRUE))
|
||||||
|
setwd("/Users/au301842/nottreated/")
|
||||||
|
|
||||||
|
zip_file<-list.files(paste0("instruments/",inst_names[i]),
|
||||||
|
pattern = ".zip",
|
||||||
|
full.names = TRUE)
|
||||||
|
|
||||||
|
file.copy(zip_file, # Copy and remove zip-file for easy upload
|
||||||
|
"instruments/")
|
||||||
|
file.remove(zip_file)
|
||||||
|
}
|
||||||
|
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
## Dataset export
|
## Dataset export
|
||||||
## -----------------------------------------------------------------------------
|
## -----------------------------------------------------------------------------
|
||||||
|
|
||||||
write.csv(dta,"ddsc_dataset.csv",row.names = FALSE)
|
exp<-r_lup|>
|
||||||
|
purrr::reduce(full_join)|>
|
||||||
|
select(-matches("forloebid"))|>
|
||||||
|
select(record_id,everything()) # To arrange "record_id" as first column
|
||||||
|
|
||||||
|
write.csv(exp,"ddsc_dataset.csv",row.names = FALSE,na="")
|
||||||
|
|
||||||
|
|
||||||
|
## -----------------------------------------------------------------------------
|
||||||
|
## Dataset export of remaing
|
||||||
|
## -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## REDCap faces troubles importing too large data sets. Apparently.
|
||||||
|
|
||||||
|
ups <- redcap_read_oneshot(
|
||||||
|
redcap_uri = uri,
|
||||||
|
token = token,
|
||||||
|
fields = "record_id"
|
||||||
|
)$data
|
||||||
|
|
||||||
|
write.csv(exp|>
|
||||||
|
filter(!record_id %in% ups$record_id),"ddsc_dataset_remain.csv",row.names = FALSE,na="")
|
||||||
|
Loading…
Reference in New Issue
Block a user