a new helper function has been introduced to help matching form names when special characters are in use.

This commit is contained in:
AG Damsbo 2023-04-14 11:46:09 +02:00
parent 4847094f2c
commit 563c864091
3 changed files with 25 additions and 7 deletions

View File

@ -88,13 +88,17 @@ REDCap_split <- function(records,
# Process user input
records <- process_user_input(records)
metadata <-
as.data.frame(process_user_input(metadata)) # See issue #12
as.data.frame(process_user_input(metadata))
# Process repeat instrument names to match the redcap naming
records$redcap_repeat_instrument <- clean_redcap_name(records$redcap_repeat_instrument)
# Get the variable names in the dataset
vars_in_data <- names(records)
# Match arg for forms
forms <- match.arg(forms)
forms <- match.arg(forms, c("repeating", "all"))
# Check to see if there were any repeating instruments
if (forms == "repeating" &&

View File

@ -76,8 +76,7 @@ read_redcap_tables <- function(uri,
# Removes any extra characters other than a-z, 0-9 and "_", to mimic raw
# instrument names.
if ("redcap_repeat_instrument" %in% names(d)) {
d$redcap_repeat_instrument <-
gsub("[^a-z0-9_]", "", gsub(" ", "_", tolower(d$redcap_repeat_instrument)))
d$redcap_repeat_instrument <- clean_redcap_name(d$redcap_repeat_instrument)
}
# Getting metadata
@ -93,7 +92,7 @@ read_redcap_tables <- function(uri,
l <- REDCap_split(d,
m,
forms = split_forms,
primary_table_name = "nonrepeating")
primary_table_name = "")
# Sanitizing split list by removing completely empty rows apart from colnames
# in "generics"

View File

@ -83,9 +83,24 @@ focused_metadata <- function(metadata, vars_in_data) {
}
#' clean_redcap_name
#' @description
#' Stepwise removal on non-alphanumeric characters, trailing white space,
#' substitutes spaces for underscores and converts to lower case.
#' Trying to make up for different naming conventions.
#'
#' @param x vector or data frame for cleaning
#'
#' @return vector or data frame, same format as input
#' @export
#'
clean_redcap_name <- function(x){
# function to convert the list of dataframes
gsub(" ", "_",
gsub("[' ']$","",
gsub("[^a-z0-9' '_]", "",
tolower(x)
)))}
#' Sanitize list of data frames