239 lines
7.2 KiB
Plaintext
239 lines
7.2 KiB
Plaintext
---
|
|
title: "TALOS eudract AE reporting - example"
|
|
author: "AGDamsbo"
|
|
date: "Knitted: `r format(Sys.time(), '%d %B %Y')`"
|
|
output:
|
|
pdf_document: default
|
|
html_document: default
|
|
toc: TRUE
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
|
|
```
|
|
|
|
# Intro
|
|
|
|
This is the data management workflow for the reporting of AEs in the TALOS trial to the EudraCT database.
|
|
A dedicated package is used to format data after a longer process of editing data to conform.
|
|
The source data is not perfectly formatted, and during the process, a few manual steps are necessary.
|
|
- Advise number one: Make sure to format data according to the desired format for reporting.
|
|
|
|
This is "page" 2 of 2. All cleaning is performed in "TALOS AE cleaning.Rmd"
|
|
|
|
|
|
```{r}
|
|
setwd("/Volumes/Data/TALOS/")
|
|
```
|
|
|
|
```{r}
|
|
library(haven)
|
|
library(dplyr)
|
|
# https://www.rdocumentation.org/packages/eudract/versions/0.9.3
|
|
library(eudract)
|
|
```
|
|
|
|
# Data import
|
|
|
|
Data set
|
|
```{r}
|
|
d<-as_factor(read_dta("/Volumes/Data/TALOS/talos_ae_clean.dta"))
|
|
write.csv(head(d,100),"sample_ae.csv")
|
|
d<-read.csv("sample_ae.csv")
|
|
```
|
|
|
|
## Modified trial-specific adjudication list with added maddra codes from eudract
|
|
```{r}
|
|
library(readxl)
|
|
adj_tbl<-read_xlsx("adjudication_table.xlsx")
|
|
# write.csv(soc_code,"soc_code.csv") # Export af soc_code til manuel kodning af oprindelig Adjudication List
|
|
head(adj_tbl)
|
|
head(soc_code)
|
|
```
|
|
|
|
# Formatting to EUDRACT
|
|
```{r}
|
|
# Included data example in the eudract-package
|
|
head(safety)
|
|
```
|
|
|
|
## Adding columns according to safety-format
|
|
|
|
### Fatal outcome
|
|
The "d$status" contains final status of every event instance, with those marked Dødelig being used.
|
|
The other option would be to code according to adj_code, but the "d$status" was GCP monitored and is used.
|
|
```{r}
|
|
# for (i in 1:nrow(d)){
|
|
# d$fatal[i]<-ifelse(str_contains(d$description[i],adj_tbl$code[adj_tbl$soc_term=="Death"],logic = "or"),1,0)
|
|
# }
|
|
|
|
d$fatal<-ifelse(d$status=="Dødelig",1,0)
|
|
table(factor(d$fatal),factor(d$rtreat))
|
|
```
|
|
|
|
### Related
|
|
All events coded with either of the three categories a considered related in this binary form.
|
|
```{r}
|
|
d$related_bin<-ifelse(d$related=="Mulig"|
|
|
d$related=="Sandsynlig"|
|
|
d$related=="Afgjort relateret",
|
|
1,0)
|
|
```
|
|
|
|
### Serious
|
|
Only SAEs are occuring, no SAR or SUSAR
|
|
```{r}
|
|
ser<-c("SAE","SAR","SUSAR")
|
|
d$serious<-ifelse(d$CLFint %in% ser,1,0)
|
|
```
|
|
|
|
### Randomisation
|
|
Group naming according to groups defined on the EudraCT page.
|
|
```{r}
|
|
d$group<-ifelse(d$rtreat=="Placebo","Placebo","Active")
|
|
```
|
|
|
|
|
|
### SOC kode og term/subcat
|
|
```{r}
|
|
ls<-list()
|
|
for (i in 1:nrow(d)){
|
|
# Text string split at ":", " ", "+" or "(" and constrained to first three digits.
|
|
# The last step as a security against a missing " " following the adjudication code or similar.
|
|
v<-substr(unlist(strsplit(d$description[i],"[: +(]")),1,3)
|
|
# vector elements contained in adj_tbl$code are subset and added to list
|
|
ls[[paste0("index", i)]] <- grep(paste(adj_tbl$code,collapse="|"),v,value = TRUE)
|
|
}
|
|
```
|
|
|
|
Splitting each list element into different columns, length(ls) equals nrow(d)
|
|
```{r}
|
|
for (i in 1:length(ls)){
|
|
# Subsets liste efter navngivning i forrige loop
|
|
v<-ls[[paste0("index", i)]]
|
|
for (j in 1:length(v)){
|
|
# Føjer til eksisterende, tilføjer ekstra kolonner ved behov
|
|
d[i,paste0("adj_code_", j)]<-v[j]
|
|
}
|
|
}
|
|
```
|
|
|
|
#### Death only event subset and recoding - manual work
|
|
Originally a "continuation" variable was also included in the export for more information on the event, however, this variable has been excluded from the data set.
|
|
```{r}
|
|
## If only 1 code, it is in adj_code_1, test if this code is any categorised as "Death"
|
|
# subset_death<-d[d$adj_code_1 %in% adj_tbl$code[adj_tbl$soc_term=="Death"] &
|
|
# lengths(ls)==1, ## Redundant test, that only one code was used
|
|
# c("description","continuation","event_id")]
|
|
## Adds an extra column for adding alternative code manually
|
|
# subset_death$add_code<-c("")
|
|
# write.csv(subset_death,"subset_death.csv")
|
|
```
|
|
|
|
Hand coded data set imported again
|
|
```{r}
|
|
head(subset_death_coded<-read_xlsx("subset_death_coded.xlsx")) ## Eight (8) cases, 5 had a new code added
|
|
## event_id 335 were not recoded, as two events (also event_id 333) are already created for this same death...
|
|
for (i in 1:nrow(d)){
|
|
for (j in 1:nrow(subset_death_coded)){
|
|
d$adj_code_2[i]<-ifelse(d$event_id[i]==subset_death_coded$event_id[j],
|
|
subset_death_coded$add_code[j],d$adj_code_2[i])
|
|
}
|
|
}
|
|
```
|
|
|
|
|
|
#### Subset events coded with "801"
|
|
```{r}
|
|
# subset_801<-d[grepl("801",d$description),c("description","continuation","event_id")]
|
|
# write.csv(subset_801,"subset_801.csv")
|
|
```
|
|
|
|
Every event has been coded with soc_code alternative to 801 or NONE if deemed irrelevant based on other codes at same event.
|
|
```{r}
|
|
head(alt_801<-read_excel("subset_801_alt.xlsx") %>% na.omit)
|
|
```
|
|
|
|
### Converting to new, long data.frame
|
|
All events with bleeding (severity) or death are excluded. Death counts will be added later.
|
|
```{r}
|
|
library(tidyr)
|
|
# dput(names(d))
|
|
dta<-pivot_longer(select(d,!matches(c("description", "expected", "status","CLFint"))),
|
|
starts_with("adj_code_"),
|
|
names_to="adj_index",
|
|
values_to = "adj_code") %>% # Pivotting to long format
|
|
na.omit %>% # Omitting NAs, result is complete case only
|
|
filter(adj_code %in% adj_tbl$code[adj_tbl$soc_term!="Death"&adj_tbl$soc_term!="Bleeding"])
|
|
# Excluding events marked with Death or Bleeding, as these are additional codes not coresponding to soc_code terms
|
|
head(dta)
|
|
```
|
|
|
|
### Matching adj_code to soc_code
|
|
Adding soc_codes and manually coded alternative categories for 801 codes.
|
|
```{r warning=FALSE}
|
|
for (i in 1:nrow(dta)){
|
|
dta$soc[i]<-adj_tbl$meddra[adj_tbl$code==dta$adj_code[i]]
|
|
for (j in 1:nrow(alt_801)){
|
|
dta$soc[i]<-ifelse(dta$event_id[i]==alt_801$event_id[j]&dta$adj_code[i]=="801",
|
|
alt_801$alt_801[j],dta$soc[i])
|
|
}
|
|
}
|
|
```
|
|
|
|
Few were not coded, omitting NAs.
|
|
```{r}
|
|
dta <- dta %>% na.omit # Only keeping correctly coded cases
|
|
```
|
|
|
|
Adding term and subcat
|
|
```{r warning=FALSE}
|
|
for (i in 1:nrow(dta)){
|
|
dta$term[i]<-soc_code$soc_term[soc_code$meddra==dta$soc[i]]
|
|
}
|
|
```
|
|
|
|
# Configuring XML
|
|
|
|
## Creating specified data frame
|
|
```{r}
|
|
df<-data.frame(subjid=dta$rnumb,
|
|
term=dta$term,
|
|
soc=as.integer(dta$soc),
|
|
serious=dta$serious,
|
|
related=dta$related_bin,
|
|
fatal=dta$fatal,
|
|
group=dta$group)
|
|
head(df)
|
|
```
|
|
|
|
## Handling Deaths
|
|
```{r}
|
|
# Deaths in named integer vector
|
|
# ae_deaths<-table(df$fatal,df$group)[2,] # No deaths included in the sample data set
|
|
# These are all the deaths observed within 6 months after randomisation, eg after intention-to-treat
|
|
all_deaths<-c("Active"=16,"Placebo"=12)
|
|
excess_death<-all_deaths #-ae_deaths
|
|
```
|
|
|
|
## Creating safety summary
|
|
```{r}
|
|
safe_sum<-safety_summary(data=df,
|
|
exposed=c("Active"=319,"Placebo"=323),
|
|
excess_deaths=excess_death)
|
|
```
|
|
|
|
```{r}
|
|
simple <- tempfile(fileext = ".xml")
|
|
eudract <- tempfile(fileext = ".xml")
|
|
simple_safety_xml(safe_sum, simple)
|
|
```
|
|
|
|
|
|
```{r}
|
|
eudract_convert(input=simple,
|
|
output=eudract)
|
|
```
|
|
|
|
|