--- title: "TALOS eudract AE reporting - example" author: "AGDamsbo" date: "Knitted: `r format(Sys.time(), '%d %B %Y')`" output: pdf_document: default html_document: default toc: TRUE --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) ``` # Intro This is the data management workflow for the reporting of AEs in the TALOS trial to the EudraCT database. A dedicated package is used to format data after a longer process of editing data to conform. The source data is not perfectly formatted, and during the process, a few manual steps are necessary. - Advise number one: Make sure to format data according to the desired format for reporting. This is "page" 2 of 2. All cleaning is performed in "TALOS AE cleaning.Rmd" ```{r} setwd("/Volumes/Data/TALOS/") ``` ```{r} library(haven) library(dplyr) # https://www.rdocumentation.org/packages/eudract/versions/0.9.3 library(eudract) ``` # Data import Data set ```{r} d<-as_factor(read_dta("/Volumes/Data/TALOS/talos_ae_clean.dta")) write.csv(head(d,100),"sample_ae.csv") d<-read.csv("sample_ae.csv") ``` ## Modified trial-specific adjudication list with added maddra codes from eudract ```{r} library(readxl) adj_tbl<-read_xlsx("adjudication_table.xlsx") # write.csv(soc_code,"soc_code.csv") # Export af soc_code til manuel kodning af oprindelig Adjudication List head(adj_tbl) head(soc_code) ``` # Formatting to EUDRACT ```{r} # Included data example in the eudract-package head(safety) ``` ## Adding columns according to safety-format ### Fatal outcome The "d$status" contains final status of every event instance, with those marked Dødelig being used. The other option would be to code according to adj_code, but the "d$status" was GCP monitored and is used. ```{r} # for (i in 1:nrow(d)){ # d$fatal[i]<-ifelse(str_contains(d$description[i],adj_tbl$code[adj_tbl$soc_term=="Death"],logic = "or"),1,0) # } d$fatal<-ifelse(d$status=="Dødelig",1,0) table(factor(d$fatal),factor(d$rtreat)) ``` ### Related All events coded with either of the three categories a considered related in this binary form. ```{r} d$related_bin<-ifelse(d$related=="Mulig"| d$related=="Sandsynlig"| d$related=="Afgjort relateret", 1,0) ``` ### Serious Only SAEs are occuring, no SAR or SUSAR ```{r} ser<-c("SAE","SAR","SUSAR") d$serious<-ifelse(d$CLFint %in% ser,1,0) ``` ### Randomisation Group naming according to groups defined on the EudraCT page. ```{r} d$group<-ifelse(d$rtreat=="Placebo","Placebo","Active") ``` ### SOC kode og term/subcat ```{r} ls<-list() for (i in 1:nrow(d)){ # Text string split at ":", " ", "+" or "(" and constrained to first three digits. # The last step as a security against a missing " " following the adjudication code or similar. v<-substr(unlist(strsplit(d$description[i],"[: +(]")),1,3) # vector elements contained in adj_tbl$code are subset and added to list ls[[paste0("index", i)]] <- grep(paste(adj_tbl$code,collapse="|"),v,value = TRUE) } ``` Splitting each list element into different columns, length(ls) equals nrow(d) ```{r} for (i in 1:length(ls)){ # Subsets liste efter navngivning i forrige loop v<-ls[[paste0("index", i)]] for (j in 1:length(v)){ # Føjer til eksisterende, tilføjer ekstra kolonner ved behov d[i,paste0("adj_code_", j)]<-v[j] } } ``` #### Death only event subset and recoding - manual work Originally a "continuation" variable was also included in the export for more information on the event, however, this variable has been excluded from the data set. ```{r} ## If only 1 code, it is in adj_code_1, test if this code is any categorised as "Death" # subset_death<-d[d$adj_code_1 %in% adj_tbl$code[adj_tbl$soc_term=="Death"] & # lengths(ls)==1, ## Redundant test, that only one code was used # c("description","continuation","event_id")] ## Adds an extra column for adding alternative code manually # subset_death$add_code<-c("") # write.csv(subset_death,"subset_death.csv") ``` Hand coded data set imported again ```{r} head(subset_death_coded<-read_xlsx("subset_death_coded.xlsx")) ## Eight (8) cases, 5 had a new code added ## event_id 335 were not recoded, as two events (also event_id 333) are already created for this same death... for (i in 1:nrow(d)){ for (j in 1:nrow(subset_death_coded)){ d$adj_code_2[i]<-ifelse(d$event_id[i]==subset_death_coded$event_id[j], subset_death_coded$add_code[j],d$adj_code_2[i]) } } ``` #### Subset events coded with "801" ```{r} # subset_801<-d[grepl("801",d$description),c("description","continuation","event_id")] # write.csv(subset_801,"subset_801.csv") ``` Every event has been coded with soc_code alternative to 801 or NONE if deemed irrelevant based on other codes at same event. ```{r} head(alt_801<-read_excel("subset_801_alt.xlsx") %>% na.omit) ``` ### Converting to new, long data.frame All events with bleeding (severity) or death are excluded. Death counts will be added later. ```{r} library(tidyr) # dput(names(d)) dta<-pivot_longer(select(d,!matches(c("description", "expected", "status","CLFint"))), starts_with("adj_code_"), names_to="adj_index", values_to = "adj_code") %>% # Pivotting to long format na.omit %>% # Omitting NAs, result is complete case only filter(adj_code %in% adj_tbl$code[adj_tbl$soc_term!="Death"&adj_tbl$soc_term!="Bleeding"]) # Excluding events marked with Death or Bleeding, as these are additional codes not coresponding to soc_code terms head(dta) ``` ### Matching adj_code to soc_code Adding soc_codes and manually coded alternative categories for 801 codes. ```{r warning=FALSE} for (i in 1:nrow(dta)){ dta$soc[i]<-adj_tbl$meddra[adj_tbl$code==dta$adj_code[i]] for (j in 1:nrow(alt_801)){ dta$soc[i]<-ifelse(dta$event_id[i]==alt_801$event_id[j]&dta$adj_code[i]=="801", alt_801$alt_801[j],dta$soc[i]) } } ``` Few were not coded, omitting NAs. ```{r} dta <- dta %>% na.omit # Only keeping correctly coded cases ``` Adding term and subcat ```{r warning=FALSE} # Subcategory name is the combined soc_term and adj_subcat for additional details in the final report. for (i in 1:nrow(dta)){ dta$term[i]<-soc_code$soc_term[soc_code$meddra==dta$soc[i]] dta$subcat[i]<-paste0(dta$term[i],": ",adj_tbl$subcat[adj_tbl$code==dta$adj_code[i]]) } ``` # Configuring XML ## Creating specified data frame ```{r} df<-data.frame(subjid=dta$rnumb, term=dta$subcat, soc=as.integer(dta$soc), serious=dta$serious, related=dta$related_bin, fatal=dta$fatal, group=dta$group) head(df) ``` ## Handling Deaths ```{r} # Deaths in named integer vector # ae_deaths<-table(df$fatal,df$group)[2,] # No deaths included in the sample data set # These are all the deaths observed within 6 months after randomisation, eg after intention-to-treat all_deaths<-c("Active"=16,"Placebo"=12) excess_death<-all_deaths #-ae_deaths ``` ## Creating safety summary ```{r} safe_sum<-safety_summary(data=df, exposed=c("Active"=319,"Placebo"=323), excess_deaths=excess_death) ``` ```{r} simple <- tempfile(fileext = ".xml") eudract <- tempfile(fileext = ".xml") simple_safety_xml(safe_sum, simple) ``` ```{r} eudract_convert(input=simple, output=eudract) ```