REDCapCAST/R/utils.r

164 lines
4.6 KiB
R
Raw Normal View History

2023-03-06 14:36:41 +01:00
focused_metadata <- function(metadata, vars_in_data) {
# metadata <- m$data
# vars_in_data <- names(d$data)
fields <-
metadata[!metadata$field_type %in% c("descriptive", "checkbox") &
metadata$field_name %in% vars_in_data,
"field_name"]
# Process checkbox fields
if (any(metadata$field_type == "checkbox")) {
# Getting base field names from checkbox fields
vars_check <- gsub(pattern = "___(\\d+)",replacement = "", vars_in_data)
# Processing
checkbox_basenames <-
metadata[metadata$field_type == "checkbox" &
metadata$field_name %in% vars_check,
"field_name"]
fields <- rbind(fields, checkbox_basenames)
}
# Process instrument status fields
form_names <- unique(metadata$form_name[metadata$field_name %in% fields$field_name])
form_complete_fields <- data.frame(
field_name = paste0(form_names, "_complete"),
stringsAsFactors = FALSE
)
fields <- rbind(fields, form_complete_fields)
# Process survey timestamps
timestamps <-
intersect(vars_in_data, paste0(form_names, "_timestamp"))
if (length(timestamps)) {
timestamp_fields <- data.frame(
field_name = timestamps,
stringsAsFactors = FALSE
)
fields <- rbind(fields, timestamp_fields)
}
# Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <-
do.call("rbind",
apply(fields,
1,
function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
if (any(field_indices))
data.frame(
field_name = y[field_indices],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
)
},
y = vars_in_data))
fields <- rbind(fields, factor_fields)
}
metadata[metadata$field_name %in% fields$field_name,]
}
match_fields_to_form <- function(metadata, vars_in_data) {
2023-01-16 09:49:17 +01:00
fields <- metadata[!metadata$field_type %in% c("descriptive", "checkbox"),
c("field_name", "form_name")]
# Process instrument status fields
form_names <- unique(metadata$form_name)
form_complete_fields <- data.frame(
field_name = paste0(form_names, "_complete"),
form_name = form_names,
stringsAsFactors = FALSE
)
fields <- rbind(fields, form_complete_fields)
# Process survey timestamps
2023-01-16 09:49:17 +01:00
timestamps <-
intersect(vars_in_data, paste0(form_names, "_timestamp"))
if (length(timestamps)) {
timestamp_fields <- data.frame(
field_name = timestamps,
form_name = sub("_timestamp$", "", timestamps),
stringsAsFactors = FALSE
)
fields <- rbind(fields, timestamp_fields)
}
# Process checkbox fields
if (any(metadata$field_type == "checkbox")) {
2023-01-16 09:49:17 +01:00
checkbox_basenames <- metadata[metadata$field_type == "checkbox",
c("field_name", "form_name")]
checkbox_fields <-
2023-01-16 09:49:17 +01:00
do.call("rbind",
apply(checkbox_basenames,
1,
function(x, y)
data.frame(
field_name = y[grepl(paste0("^", x[1], "___((?!\\.factor).)+$"), y, perl = TRUE)],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
),
y = vars_in_data))
fields <- rbind(fields, checkbox_fields)
}
# Process ".*\\.factor" fields supplied by REDCap's export data R script
if (any(grepl("\\.factor$", vars_in_data))) {
factor_fields <-
2023-01-16 09:49:17 +01:00
do.call("rbind",
apply(fields,
1,
function(x, y) {
field_indices <- grepl(paste0("^", x[1], "\\.factor$"), y)
if (any(field_indices))
data.frame(
field_name = y[field_indices],
form_name = x[2],
stringsAsFactors = FALSE,
row.names = NULL
)
},
y = vars_in_data))
fields <- rbind(fields, factor_fields)
}
fields
2023-01-16 09:49:17 +01:00
}
2023-01-16 09:49:17 +01:00
split_non_repeating_forms <-
function(table, universal_fields, fields) {
forms <- unique(fields[[2]])
2023-01-16 09:49:17 +01:00
x <- lapply(forms,
function (x) {
table[names(table) %in% union(universal_fields, fields[fields[, 2] == x, 1])]
})
2023-01-16 09:49:17 +01:00
structure(x, names = forms)
2023-01-16 09:49:17 +01:00
}