first commit of old data
This commit is contained in:
parent
edb0afd869
commit
2d7f0fc186
BIN
MDI scores, PASE:rand.docx
Normal file
BIN
MDI scores, PASE:rand.docx
Normal file
Binary file not shown.
304
dep_data.Rmd
Normal file
304
dep_data.Rmd
Normal file
@ -0,0 +1,304 @@
|
||||
---
|
||||
title: "Data export and wrangling"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output:
|
||||
pdf_document: default
|
||||
html_document: default
|
||||
toc: TRUE
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE, message = FALSE)
|
||||
```
|
||||
|
||||
```{r message=FALSE}
|
||||
library(haven)
|
||||
library(plyr)
|
||||
library(dplyr)
|
||||
library(reshape2)
|
||||
```
|
||||
|
||||
```{r}
|
||||
dta<-read.csv("/Volumes/Data/exercise/source/background.csv",
|
||||
na.strings = c("NA",""),colClasses = "character")
|
||||
# dta_b<-dta
|
||||
```
|
||||
|
||||
# Variables
|
||||
List of variables included in dataset
|
||||
|
||||
```{r}
|
||||
dput(names(dta))
|
||||
```
|
||||
|
||||
## New additions and formatting
|
||||
|
||||
```{r}
|
||||
dta$mors_delay<-difftime(as.Date(dta$mors_d),as.Date(dta$rdate),units = "days")
|
||||
dta$mors_v1<-factor(ifelse(dta$mors_delay<=38&
|
||||
(dta$mors_delay-as.numeric(dta$inc_time))<=1,
|
||||
"yes","no"))
|
||||
# Tæller som død hvis død inden 38 dage og dødsdato og EOS ligger indenfor 1 døgn.
|
||||
dta$mors_v1[is.na(dta$mors_v1)]<-"no"
|
||||
```
|
||||
|
||||
```{r}
|
||||
dta$mors_v16<-factor(ifelse(dta$mors_v1=="no"&
|
||||
(dta$mors_delay-as.numeric(dta$inc_time))<=1,
|
||||
"yes","no"))
|
||||
# Tæller som død mellem 1 til 6 mdr, hvis ikke død inden 1 mdr,
|
||||
# og dødsdato og EOS ligger indenfor 1 døgn.
|
||||
dta$mors_v16[is.na(dta$mors_v16)]<-"no"
|
||||
```
|
||||
|
||||
PASE score dichotomisation at median score.
|
||||
```{r}
|
||||
dta$pase_0<-as.numeric(dta$pase_0)
|
||||
dta$pase_0_bin<-cut(dta$pase_0,
|
||||
c(min(dta$pase_0,na.rm = T),median(dta$pase_0,na.rm = T),
|
||||
max(dta$pase_0,na.rm = T)),include.lowest = T,
|
||||
labels = c("lower","higher"))
|
||||
quantile(dta$pase_0,na.rm = T)
|
||||
```
|
||||
|
||||
### Formatting
|
||||
|
||||
```{r}
|
||||
dta$inc_time<-as.numeric(dta$inc_time)
|
||||
```
|
||||
|
||||
# Cleaning MDI scores
|
||||
|
||||
The following contains a serious bit of data wrangling. Reasons are the occasional recording of visit 1 data at 6 months due to LOCF approach. Additionally some patients have data recorded at 6 months, but later end date has been defined as prior to the visit 6.
|
||||
Additionally the definition of when to define a MDI recording as 1 month or 6 months have added a bit of extra work..
|
||||
|
||||
This work should be applied for all endpoint data. If needed, a general script or function should be written.
|
||||
|
||||
Steps used for the correction:
|
||||
|
||||
1. If the inc_time is 38 days or less MDI 6 scores are moved to MDI 1 and visit 6 is defined as visit 1.
|
||||
2. If both visit 1 and 6 dates are NA, use enddate as visit 1 date. This is the case if patients were excluded early.
|
||||
3. If visit 6 is recorded later than enddate, use enddate instead. MDI 6 score is dropped.
|
||||
4. If visit delay is 7 days or less, and inclusion time is more than 38, MDI 1 is moved to MDI 6 and dropped. If MDI 1 and 6 are different both are kept. Enddate is moved to visit 6 date.
|
||||
5. Defining the visit 6 date as same as enddate if visit delay is <7.
|
||||
|
||||
|
||||
```{r}
|
||||
summary(inc196<-dta$inc_time>196)
|
||||
dt1<-dta[inc196,c("rnumb","rdate","visit_1","visit_6","enddate","inc_time","mdi_1","mdi_6","mors_delay")]
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Step 1
|
||||
```{r}
|
||||
summary(inc38<-dta$inc_time<=38)
|
||||
dt1<-dta[inc38,c("rnumb","rdate","visit_1","visit_6","inc_time","mdi_1","mdi_6")]
|
||||
dta$visit_1<-ifelse(inc38&!is.na(dta$visit_6),dta$visit_6,dta$visit_1)
|
||||
dta$mdi_1<-ifelse(inc38&is.na(dta$mdi_1),dta$mdi_6,dta$mdi_1)
|
||||
dta$mdi_6[inc38]<-NA
|
||||
dta$visit_6[inc38]<-NA
|
||||
# If the inc_time is 38 days or less MDI 6 scores are moved to MDI 1 and visit 6 is defined as visit 1.
|
||||
# LOCF correction.
|
||||
```
|
||||
|
||||
## Step 2
|
||||
```{r}
|
||||
summary(na16enddate<-is.na(dta$visit_1)&is.na(dta$visit_6))
|
||||
dt2<-dta[na16enddate,c("rnumb","rdate","visit_1","visit_6","inc_time","mdi_1","mdi_6")]
|
||||
dta$visit_1<-ifelse(na16enddate,dta$enddate,dta$visit_1)
|
||||
# If both visit 1 and 6 dates are NA, use enddate as visit 1 date. This is the case if patients were excluded early.
|
||||
```
|
||||
|
||||
## Step 3
|
||||
```{r}
|
||||
summary(late61<-as.Date(dta$visit_6)>as.Date(dta$enddate)&difftime(as.Date(dta$visit_6),as.Date(dta$enddate),units = "days")<=1)
|
||||
summary(late62<-as.Date(dta$visit_6)>as.Date(dta$enddate)&difftime(as.Date(dta$visit_6),as.Date(dta$enddate),units = "days")>1)
|
||||
|
||||
late61[is.na(late61)]<-FALSE
|
||||
late62[is.na(late62)]<-FALSE
|
||||
|
||||
# dt5<-dta[late61,c("rnumb","rdate","visit_1","visit_6","enddate","inc_time","mdi_1","mdi_6")]
|
||||
# dt6<-dta[late62,c("rnumb","rdate","visit_1","visit_6","enddate","inc_time","mdi_1","mdi_6")]
|
||||
|
||||
dta$visit_6<-ifelse(late61,dta$enddate,dta$visit_6)
|
||||
dta$visit_6<-ifelse(late62,dta$enddate,dta$visit_6)
|
||||
dta$mdi_6[late62]<-NA
|
||||
# If visit 6 is recorded later than enddate, use enddate instead
|
||||
# A group of patients have visit 6 and MDI 6 recorded, but enddate is before visit 6 data.
|
||||
# After manual lookups, this is likely due to some patients coming for visit 6, but the
|
||||
# interviewer later realizing, that the patients should have been excluded earlier on.
|
||||
# Due to this, patients with enddate more than 1 day (leaving room for simple recording errors) prior to visit 6 date, MDI 6 will be dropped.
|
||||
# Patients with 1 day difference the enddate is moved to visit 6 date.
|
||||
```
|
||||
|
||||
## Step 4
|
||||
|
||||
```{r}
|
||||
summary(locflate<-(difftime(as.Date(dta$visit_6),as.Date(dta$visit_1))<=7|is.na(dta$visit_1))&dta$inc_time>38)
|
||||
locflate[is.na(locflate)]<-FALSE
|
||||
|
||||
dt2<-dta[locflate,c("rnumb","rdate","visit_1","visit_6","inc_time","mdi_1","mdi_6")]
|
||||
|
||||
dta$mdi_6<-ifelse(locflate&is.na(dta$mdi_6),dta$mdi_1,dta$mdi_6)
|
||||
|
||||
dta$mdi_1[locflate&dta$mdi_1==dta$mdi_6]<-NA
|
||||
dta$visit_1[locflate&is.na(dta$mdi_1)]<-NA
|
||||
|
||||
dta$visit_6<-ifelse(locflate,dta$enddate,dta$visit_6)
|
||||
|
||||
# If visit delay is 7 days or less, and inclusion time is more than 38, MDI 1 is moved to MDI 6 and dropped. If MDI 1 and 6 are different both are kept. Enddate is moved to visit 6 date.
|
||||
```
|
||||
|
||||
## Step 5
|
||||
```{r}
|
||||
summary(same1n6date<-difftime(as.Date(dta$visit_6),as.Date(dta$visit_1),units = "days")<7)
|
||||
same1n6date[is.na(same1n6date)]<-FALSE
|
||||
# dt5<-dta[same1n6date,c("rnumb","rdate","visit_1","visit_6","enddate","inc_time","mdi_1","mdi_6",drops)]
|
||||
dta$visit_6<-ifelse(same1n6date,dta$enddate,dta$visit_6)
|
||||
# Defining the visit 6 date as same as enddate if visit delay is <7.
|
||||
```
|
||||
|
||||
|
||||
# Visit delay
|
||||
```{r}
|
||||
dta$visit_delay<-difftime(as.Date(dta$visit_6),as.Date(dta$visit_1),units = "days")
|
||||
# Final calculation of days between visits
|
||||
summary(as.numeric(dta$visit_delay))
|
||||
```
|
||||
|
||||
# newobs definition - DEPRECATED
|
||||
|
||||
The definition of a truly new observation is a recorded score at least 7 days after the first score. This was relevant prior to the work of redefining time points for scoring.
|
||||
```{r}
|
||||
dta$mdi_6_newobs<-dta$mdi_6
|
||||
# The newobs variable is later used, but is obsolete due to the previous change in definitions. The previous definition of newobs were a delay between visits of at least 7 days. No cases matched. Minimum is 13.
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
# dta$mdi_6_166<-ifelse(dta$inc_time>166,NA,dta$mdi_6)
|
||||
# dta$mdi_6_80<-ifelse(dta$inc_time>80,NA,dta$mdi_6)
|
||||
# dta$mdi_6_protocol<-ifelse(dta$protocol=="2",NA,ifelse(is.na(dta$mdi_6),dta$mdi_1,dta$mdi_6))
|
||||
# dta$mdi_6_locf<-ifelse(is.na(dta$mdi_6),dta$mdi_1,dta$mdi_6)
|
||||
```
|
||||
|
||||
|
||||
# Drops
|
||||
Streamlining drop out data to avoid NA's.
|
||||
```{r}
|
||||
drops<-c("side_effect2","side_effect","wants_out","open_treat")
|
||||
for (i in drops) {
|
||||
dta[i]<-ifelse(dta[i]=="1. Ja","yes","no")
|
||||
dta[i][is.na(dta[i])]<-"no"
|
||||
}
|
||||
```
|
||||
|
||||
Defining a common all cause drop variable
|
||||
```{r}
|
||||
dta$drop<-ifelse(dta$side_effect2=="yes"|dta$side_effect=="yes"|dta$wants_out=="yes"|dta$open_treat=="yes","yes","no")
|
||||
```
|
||||
|
||||
Defining drop before or at day 38 (Following protocol design) as drop before 1 month and drop after day 38 as drop between 1 and 6 months
|
||||
```{r}
|
||||
cut_line<-38
|
||||
dta$inc_time<-as.numeric(dta$inc_time)
|
||||
dta$drop1<-ifelse(dta$drop=="yes"&dta$inc_time<=cut_line,"yes","no")
|
||||
summary(factor(dta$drop1))
|
||||
|
||||
# dt3<-dta[,c("rnumb","rdate","visit_1","visit_6","inc_time","mdi_1","mdi_6","mdi_6_newobs","drop1","drop16",drops)]
|
||||
|
||||
dta$drop16<-ifelse(dta$drop=="yes"&dta$inc_time>cut_line,"yes","no")
|
||||
summary(factor(dta$drop16))
|
||||
summary(factor(dta$drop))
|
||||
# dtf<-dta[dta$drop1=="yes",c("mdi_6_newobs","inc_time")]
|
||||
# dtf<-dta[,c("mdi_1","mdi_6_newobs","inc_time","drop","drop1","drop16")]
|
||||
```
|
||||
|
||||
|
||||
# Enriching
|
||||
With patients excluded due to open treatment need and defining populations to include/exclude
|
||||
```{r}
|
||||
summary(sel_enr_1<-dta$open_treat=="yes"&is.na(dta$mdi_1)&dta$drop1=="yes")
|
||||
dta$mdi_1_enr<-ifelse(sel_enr_1,21,dta$mdi_1) # Per agreement, patients excluded due to open treatment need a given the fictive MDI score "21".
|
||||
```
|
||||
|
||||
Vectorising ex/inclusions at 1 month, to keep patients with data or with later data.
|
||||
```{r}
|
||||
summary(dta$excluded_1<-factor(case_when(dta$mors_v1=="yes"|
|
||||
is.na(dta$mdi_1_enr)&
|
||||
dta$drop1=="yes"~"ex_1", # Excluded
|
||||
is.na(dta$mdi_1_enr)&!is.na(dta$mdi_6_newobs)~"ca_1", # Missing, but carried to 6 months
|
||||
is.na(dta$mdi_1_enr)~"mi_1", # Missing,
|
||||
is.na(dta$mdi_1)&!is.na(dta$mdi_1_enr)~"en_1",
|
||||
TRUE ~ "dt_1"))) # Data available
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
summary(sel_enr_6<-dta$open_treat=="yes"&dta$drop16=="yes"&is.na(dta$mdi_6_newobs)&dta$excluded_1%in%c("ca_1","dt_1")&dta$mors_v16=="no")
|
||||
|
||||
# Entries to be enriched are entries with need for open treatment after 1 month, with missing mdi_6_newobs and with data at or "carried" from 1 month
|
||||
|
||||
dta$mdi_6_newobs_enr<-as.numeric(ifelse(sel_enr_6,21,dta$mdi_6_newobs)) # Per agreement, patients excluded due to open treatment need a given the fictive MDI score "21".
|
||||
```
|
||||
|
||||
```{r}
|
||||
summary(dta$excluded_6<-factor(case_when(is.na(dta$mdi_6_newobs_enr)&dta$excluded_1%in%c("ca_1","dt_1","en_1")~"ex_6", # Excluded due to death or other dropout
|
||||
is.na(dta$mdi_6_newobs_enr)~"mi_6", # Missing data due to exclusion at 1 month
|
||||
is.na(dta$mdi_6_newobs)&!is.na(dta$mdi_6_newobs_enr)~"en_6", # Enriched entries
|
||||
dta$excluded_1%in%c("ca_1","dt_1")~"dt_6" # Organic data available
|
||||
))) # Data available
|
||||
```
|
||||
|
||||
```{r}
|
||||
# dtf<-cbind(dta[,c("rnumb","mdi_1","mdi_6_newobs","inc_time","drop","drop1","drop16","mdi_1_enr","mdi_6_newobs_enr","excluded_1","excluded_6","mors_v16","mors_delay")],"excluded"=is.na(dta$mdi_6_newobs_enr)&dta$excluded_1%in%c("ca_1","dt_1","en_1"))
|
||||
#
|
||||
# summary(dtf %>% filter(excluded==TRUE))
|
||||
```
|
||||
|
||||
|
||||
# Main Dataset export
|
||||
```{r}
|
||||
variable_namebits<-c("rnumb","rtreat","age","sex",
|
||||
"bmi",
|
||||
"smoke_ever",
|
||||
"civil",
|
||||
"diabetes",
|
||||
"hypertension",
|
||||
"pad",
|
||||
"afli",
|
||||
"ami",
|
||||
"tci",
|
||||
"nihss_0",
|
||||
"thrombolysis",
|
||||
"thrombechtomy",
|
||||
"rep_any",
|
||||
"pase_0",
|
||||
"pase_6",
|
||||
"mrs_0","mrs_1","mrs_6",
|
||||
# "who5_score",
|
||||
"mdi",
|
||||
# "ham_score_1","ham_score_6",
|
||||
"mors",
|
||||
"drop",
|
||||
"wants_out",
|
||||
"side_effect",
|
||||
"open_treat",
|
||||
"side_effect2",
|
||||
"excluded",
|
||||
"protocol","eos_early","inc_time",
|
||||
"rdate","visit","enddate"
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
export<-dta %>% select(contains(variable_namebits))
|
||||
```
|
||||
|
||||
```{r}
|
||||
write.csv(export,"/Volumes/Data/depression/dep_dataset.csv",row.names = FALSE)
|
||||
```
|
||||
|
BIN
dep_data.pdf
Normal file
BIN
dep_data.pdf
Normal file
Binary file not shown.
164
dep_flow.Rmd
Normal file
164
dep_flow.Rmd
Normal file
@ -0,0 +1,164 @@
|
||||
---
|
||||
title: "Patient flowchart and chi^2 tests"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output: pdf_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE, message = FALSE)
|
||||
```
|
||||
|
||||
# Import
|
||||
```{r}
|
||||
dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv")
|
||||
```
|
||||
|
||||
# Defining patients to include for analysis
|
||||
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||||
```{r}
|
||||
dta<-dta_all[!is.na(dta_all$pase_0),]
|
||||
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||||
```
|
||||
|
||||
# Backup
|
||||
```{r}
|
||||
dta_b<-dta
|
||||
```
|
||||
|
||||
|
||||
# Sammentællinger
|
||||
```{r}
|
||||
summary(cbind(is.na(dta_all[,c("pase_0","mdi_1","mdi_1_enr","mdi_6_newobs","mdi_6_newobs_enr")]),
|
||||
both_missing=is.na(dta$mdi_1)&is.na(dta$mdi_6_newobs),
|
||||
either_missing=is.na(dta$mdi_1)|is.na(dta$mdi_6_newobs)))
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
suppressWarnings(summary(cbind(all_particip=dta_all$mors_180=="yes",
|
||||
all_pase0=dta$mors_180=="yes",
|
||||
all_mdi_1=!is.na(dta$mdi_1)&dta$mors_180=="yes"))) # Antal der dør
|
||||
|
||||
table(dta$pase_0_bin,factor(dta$mors_180)) # Antal der dør, stratificeret efter PASE gruppe
|
||||
```
|
||||
|
||||
```{r}
|
||||
# summary(factor(dta$mors_v1))
|
||||
# summary(factor(dta$mors_v16)) # OBS medregnet er 2 dødsfald, der ikke har MDI 1.
|
||||
```
|
||||
|
||||
# Flow
|
||||
|
||||
## 1 month
|
||||
|
||||
Shows counts of all patients withs missing MDI 1 scores.
|
||||
```{r message=FALSE}
|
||||
source("/Volumes/Data/depression/function_flow.R") # Home made flow function
|
||||
show(flow_prog(df=dta[dta$excluded_1%in%c("ex_1","mi_1","ca_1"),],
|
||||
sngl=c("mors_v1","drop1"),
|
||||
sngl_keep=c("no","yes"),
|
||||
mltp=c("open_treat","wants_out","side_effect","side_effect2")))
|
||||
# v1<-dta$rnumb[dta$excluded_1%in%c("ex_1","mi_1")]
|
||||
```
|
||||
|
||||
Same overview, but vectorised
|
||||
```{r}
|
||||
summary(factor(dta$excluded_1))
|
||||
|
||||
# dt_1 are organic data, en_1 are enriched, ex_1 are excluded, mi_1 were missing,
|
||||
# ca_1 were missing at 1 month, but held data at 6 months, and thus carried along.
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 6 months
|
||||
|
||||
Shows counts of all patients withs missing MDI 6 scores.
|
||||
```{r}
|
||||
show(flow_prog(df=dta[is.na(dta$mdi_6_newobs_enr)&dta$excluded_6%in%c("ex_6"),], #
|
||||
sngl=c("mors_v16","drop16"),
|
||||
sngl_keep=c("no","yes"),
|
||||
mltp=c("open_treat","wants_out","side_effect","side_effect2")))
|
||||
# v2<-dta$rnumb[is.na(dta$mdi_6_newobs_enr)&dta$excluded_1%in%c("ca_1","dt_1")]
|
||||
```
|
||||
|
||||
```{r}
|
||||
summary(factor(dta$excluded_6))
|
||||
|
||||
# dt_6 are organic data, en_6 are enriched, ex_6 are excluded, mi_6 were excluded at 1 month.
|
||||
# At 6 month 118 are excluded due to any cause
|
||||
# Due to later inclusion of ca_1 patients, the sum of patients excluded at 6 months is 71+62-16=117
|
||||
```
|
||||
|
||||
This flow counts all patients dying or dropping out early after 1 month. Some have a recorded MDI at dropout. This is just to give a perspective on data.
|
||||
```{r}
|
||||
show(flow_prog(df=dta, #
|
||||
sngl=c("mors_v16","drop16"),
|
||||
sngl_keep=c("no","yes"),
|
||||
mltp=c("open_treat","wants_out","side_effect","side_effect2")))
|
||||
# v2<-dta$rnumb[is.na(dta$mdi_6_newobs_enr)&dta$excluded_1%in%c("ca_1","dt_1")]
|
||||
```
|
||||
|
||||
```{r}
|
||||
summary(as.numeric(dta$inc_time[dta$drop16=="yes"]))
|
||||
```
|
||||
|
||||
|
||||
# Chi^2 tests
|
||||
|
||||
```{r}
|
||||
source("/Volumes/Data/depression/function_chi_test_sum.R")
|
||||
ex_lst<-list()
|
||||
ex_var<-c("mdi_1_enr","rtreat","pase_0_bin")
|
||||
for (i in 2:3){
|
||||
ex_lst <- append(ex_lst,chi_test_sum(a=is.na(dta[,ex_var[1]]),
|
||||
b=dta[,ex_var[i]],
|
||||
aname=ex_var[1],
|
||||
bname=ex_var[i]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
# ex_var<-c("open_treat","rtreat","pase_0_bin")
|
||||
# for (i in 2:3){
|
||||
# ex_lst <- append(ex_lst,
|
||||
# chi_test_sum(a=dta[,ex_var[1]],
|
||||
# b=dta[,ex_var[i]],
|
||||
# aname=ex_var[1],
|
||||
# bname=ex_var[i]))
|
||||
# }
|
||||
|
||||
## Taget ud grundet for lave tal
|
||||
```
|
||||
|
||||
```{r}
|
||||
ex_var<-c("mdi_1_enr","rtreat","pase_0_bin")
|
||||
for (i in 2:3){
|
||||
ex_lst <- append(ex_lst,
|
||||
chi_test_sum(a=is.na(dta$mdi_6_newobs_enr)&!is.na(dta$mdi_1_enr),
|
||||
b=dta[,ex_var[i]],
|
||||
aname="Excluded at 6 months",
|
||||
bname=ex_var[i]))
|
||||
}
|
||||
for (i in 2:3){
|
||||
ex_lst <- append(ex_lst,
|
||||
chi_test_sum(a=is.na(dta$mdi_6_newobs_enr),
|
||||
b=dta[,ex_var[i]],
|
||||
aname="Total unavailable at 6 months",
|
||||
bname=ex_var[i]))
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
# ex_var<-c("open_treat","rtreat","pase_0_bin")
|
||||
# for (i in 2:3){
|
||||
# ex_lst <- append(ex_lst,
|
||||
# chi_test_sum(a=dta[,ex_var[1]],
|
||||
# b=dta[,ex_var[i]],
|
||||
# aname=ex_var[1],
|
||||
# bname=ex_var[i]))
|
||||
# }
|
||||
show(ex_lst)
|
||||
```
|
BIN
dep_flow.pdf
Normal file
BIN
dep_flow.pdf
Normal file
Binary file not shown.
299
dep_imputation.Rmd
Normal file
299
dep_imputation.Rmd
Normal file
@ -0,0 +1,299 @@
|
||||
---
|
||||
title: "Sensitivity analysis on imputed dataset"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output: pdf_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE, message = FALSE)
|
||||
```
|
||||
|
||||
|
||||
# Import
|
||||
```{r}
|
||||
dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv",na.strings = c("NA","unknown")) ## Extending definition of NA's for imputation
|
||||
```
|
||||
|
||||
# Defining patients to include for analysis
|
||||
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||||
```{r}
|
||||
dta<-dta_all[!is.na(dta_all$pase_0),]
|
||||
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||||
```
|
||||
|
||||
## Formatting
|
||||
```{r echo=FALSE}
|
||||
dta$diabetes<-factor(dta$diabetes)
|
||||
dta$pad<-factor(dta$pad)
|
||||
|
||||
dta$civil<-factor(dta$civil)
|
||||
|
||||
dta$hypertension<-factor(dta$hypertension)
|
||||
dta$afli<-factor(dta$afli)
|
||||
dta$smoke_ever<-factor(dta$smoke_ever)
|
||||
dta$ami<-factor(dta$ami)
|
||||
dta$tci<-factor(dta$tci)
|
||||
dta$thrombolysis<-factor(dta$thrombolysis)
|
||||
dta$thrombechtomy<-factor(dta$thrombechtomy)
|
||||
dta$rep_any<-factor(dta$rep_any)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$nihss_0<-as.numeric(dta$nihss_0)
|
||||
dta$age<-as.numeric(dta$age)
|
||||
dta$rtreat<-factor(dta$rtreat)
|
||||
dta$sex<-factor(dta$sex)
|
||||
dta$pase_0<-as.numeric(dta$pase_0)
|
||||
dta$pase_6<-as.numeric(dta$pase_6)
|
||||
dta$bmi<-as.numeric(dta$bmi)
|
||||
dta$mdi_6<-as.numeric(dta$mdi_6)
|
||||
dta$pase_0_bin<-factor(dta$pase_0_bin,levels=c("lower","higher"))
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
# Backup
|
||||
dta_b<-dta
|
||||
```
|
||||
|
||||
|
||||
# Libraries
|
||||
```{r}
|
||||
library(daDoctoR)
|
||||
library(mice)
|
||||
```
|
||||
|
||||
## Variables to include in imputation
|
||||
```{r}
|
||||
# Possible variables to include
|
||||
coval<-c("pase_0_bin","rtreat","age","sex","smoke_ever","civil","bmi","diabetes", "hypertension", "afli","pad","nihss_0","rep_any")
|
||||
```
|
||||
|
||||
|
||||
# Imputation
|
||||
```{r}
|
||||
# Output variables added to include in model. Excluded from predicting.
|
||||
outc<-c("mdi_1_enr","mdi_6_newobs_enr")
|
||||
# Adding all
|
||||
covar<-c(coval,outc)
|
||||
# Selecting dataset
|
||||
r<-dta[,c("rnumb",covar)]
|
||||
```
|
||||
|
||||
```{r}
|
||||
# Iterations
|
||||
mxt=20
|
||||
# Imputations
|
||||
mis=5
|
||||
```
|
||||
|
||||
https://datascienceplus.com/imputing-missing-data-with-r-mice-package/
|
||||
```{r}
|
||||
md.pattern(r) # Missing pattern
|
||||
# library(VIM)
|
||||
# aggr_plot <- aggr(r, col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE, labels=names(data), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
init <- mice(r, maxit=0) # Creating initial imputation list to assess methods
|
||||
meth <- init$method
|
||||
meth
|
||||
predM <- init$predictorMatrix
|
||||
|
||||
predM[, c("rnumb")] <- 0 # Defining variables not to be used for predicting imputed values
|
||||
|
||||
# meth[outc]=""
|
||||
# Defining variables not to be imputed.
|
||||
# Commented out as all included variables will be imputed
|
||||
```
|
||||
|
||||
```{r echo=FALSE}
|
||||
imputed <- mice(r, method=meth, predictorMatrix=predM, m=mis, maxit = mxt,seed = 103, printFlag=FALSE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
# summary(imputed)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(dplyr)
|
||||
export<-dta %>%
|
||||
select(-all_of(coval)) %>% # Leaving out imputed variables from original dataset
|
||||
left_join(mice::complete(imputed,1),.,by="rnumb") # Join with the first imputed dataset for a full dataset export
|
||||
|
||||
md.pattern(export[coval]) # Ensuring complete data
|
||||
|
||||
write.csv(export,"/Volumes/Data/depression/imputed.csv",row.names = FALSE) # Export
|
||||
```
|
||||
|
||||
|
||||
# Regression analyses
|
||||
```{r}
|
||||
print(adjs_10<-rep_lm(meas = "mdi_6",string=c("pase_0_bin","rtreat",coval),data=dta,cut.p = .1)[[2]])
|
||||
```
|
||||
|
||||
## Bivariabel
|
||||
|
||||
Function to format collected data from pool function
|
||||
```{r}
|
||||
pool_table<-function(clls){
|
||||
## Variables needed: estimate, p.value, term
|
||||
|
||||
coll$lo<-round(coll$estimate-coll$std.error*1.96,2)
|
||||
coll$hi<-round(coll$estimate+coll$std.error*1.96,2)
|
||||
|
||||
pa<-coll$p.value
|
||||
pa <- ifelse(pa < 0.001, "<0.001", round(pa, 3))
|
||||
pa <- ifelse(pa <= 0.05 | pa == "<0.001", paste0("*",pa), ifelse(pa > 0.05 & pa <= 0.1, paste0(".", pa),pa))
|
||||
|
||||
cl<-data.frame(id=coll$term,diff=paste0(round(coll$estimate,2)," (",coll$lo," to ",coll$hi,")"),p=pa,stringsAsFactors=FALSE)
|
||||
return(cl)
|
||||
}
|
||||
|
||||
keeps<-c("term","estimate","std.error","p.value")
|
||||
```
|
||||
|
||||
### Repeated bivariabel analysis
|
||||
Not necessary for this, but an interesting addition
|
||||
|
||||
```{r}
|
||||
coll<-c()
|
||||
for (i in c("rtreat",adjs_10)){
|
||||
## Bivariable linear regression analysis of all
|
||||
coeffs<-summary(pool(
|
||||
with(imputed,lm(as.formula(paste0("mdi_1_enr~",i))))
|
||||
))[-1,c("term","estimate","std.error","p.value")]
|
||||
|
||||
coll<-rbind(coll,coeffs)
|
||||
|
||||
## Inspiration: https://stackoverflow.com/questions/40132829/r-for-loop-in-a-formula
|
||||
## Also: https://gist.github.com/AaronGullickson/3ccb3fdd1778b32fc46df40d78faf5d3
|
||||
}
|
||||
|
||||
## Collecting
|
||||
|
||||
coll$lo<-round(coll$estimate-coll$std.error*1.96,2)
|
||||
coll$hi<-round(coll$estimate+coll$std.error*1.96,2)
|
||||
|
||||
pa<-coll$p.value
|
||||
pa <- ifelse(pa < 0.001, "<0.001", round(pa, 3))
|
||||
pa <- ifelse(pa <= 0.05 | pa == "<0.001", paste0("*",pa), ifelse(pa > 0.05 & pa <= 0.1, paste0(".", pa),pa))
|
||||
|
||||
coll_bi<-data.frame(diff=paste0(round(exp(coll$estimate),2)," (",coll$lo," to ",coll$hi,")"),p=pa,id=coll$term,stringsAsFactors=FALSE)
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Unadjusted analyses
|
||||
|
||||
```{r}
|
||||
adjs_10m<-adjs_10[adjs_10!="pase_0_bin"]
|
||||
|
||||
adj_m<-c("rtreat","pase_0_bin")
|
||||
|
||||
coll<-c()
|
||||
nms<-c()
|
||||
for (l in outc){
|
||||
|
||||
for (i in adj_m){
|
||||
coeffs<-summary(pool(
|
||||
with(imputed,lm(as.formula(paste0(l,"~",i))))
|
||||
))[-1,keeps]
|
||||
coll<-rbind(coll,coeffs)
|
||||
|
||||
nms<-c(nms,paste(l,i,sep = "_"))
|
||||
|
||||
d.long <- mice::complete(imputed,"long",include = T)
|
||||
|
||||
# Inspiration: https://stackoverflow.com/questions/53014141/mice-splitting-imputed-data-for-further-analysis
|
||||
|
||||
for (j in levels(d.long[[i]])){
|
||||
k<-length(adj_m)-grep(i,adj_m)+1 ## This only works to select the "opposite" of i for length(adj_m)==2
|
||||
|
||||
s.imp<-mice::as.mids(d.long[which(d.long[[i]] == j),]) # Subsetting long and convert to "mids" format for pooling
|
||||
|
||||
coeffs<-summary(pool(
|
||||
with(s.imp,lm(as.formula(paste0(l,"~",adj_m[k]))))
|
||||
))[-1,keeps]
|
||||
|
||||
coll<-rbind(coll,coeffs)
|
||||
|
||||
nms<-c(nms,paste(l,j,sep = "_"))
|
||||
}
|
||||
|
||||
|
||||
## Inspiration: https://stackoverflow.com/questions/40132829/r-for-loop-in-a-formula
|
||||
## Also: https://gist.github.com/AaronGullickson/3ccb3fdd1778b32fc46df40d78faf5d3
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
coll$term<-nms
|
||||
|
||||
```
|
||||
|
||||
### Collecting
|
||||
```{r}
|
||||
biv_coll<-pool_table(coll)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Adjusted analyses
|
||||
|
||||
```{r}
|
||||
adjs_10m<-adjs_10[adjs_10!="pase_0_bin"]
|
||||
|
||||
adj_m<-c("rtreat","pase_0_bin")
|
||||
|
||||
coll<-c()
|
||||
nms<-c()
|
||||
for (l in outc){
|
||||
# l="mdi_1_enr"
|
||||
for (i in adj_m){
|
||||
coeffs<-summary(pool(
|
||||
with(imputed,lm(as.formula(paste0(l,"~",paste(i,paste(adjs_10m,collapse="+"),sep="+")))))
|
||||
))[2,keeps]
|
||||
coll<-rbind(coll,coeffs)
|
||||
|
||||
nms<-c(nms,paste(l,i,sep = "_"))
|
||||
|
||||
d.long <- mice::complete(imputed,"long",include = T)
|
||||
|
||||
# Inspiration: https://stackoverflow.com/questions/53014141/mice-splitting-imputed-data-for-further-analysis
|
||||
|
||||
for (j in levels(d.long[[i]])){
|
||||
k<-length(adj_m)-grep(i,adj_m)+1 ## This only works to select the "opposite" of i for length(adj_m)==2
|
||||
|
||||
s.imp<-mice::as.mids(d.long[which(d.long[[i]] == j),]) # Subsetting long and convert to "mids" format for pooling
|
||||
|
||||
coeffs<-summary(pool(
|
||||
with(s.imp,lm(as.formula(paste0(l,"~",paste(adj_m[k],paste(adjs_10m,collapse="+"),sep="+")))))
|
||||
))[2,keeps]
|
||||
|
||||
coll<-rbind(coll,coeffs)
|
||||
|
||||
nms<-c(nms,paste(l,j,sep = "_"))
|
||||
}
|
||||
|
||||
|
||||
## Inspiration: https://stackoverflow.com/questions/40132829/r-for-loop-in-a-formula
|
||||
## Also: https://gist.github.com/AaronGullickson/3ccb3fdd1778b32fc46df40d78faf5d3
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
coll$term<-nms
|
||||
|
||||
```
|
||||
|
||||
### Collecting
|
||||
```{r}
|
||||
mul_coll<-pool_table(coll)
|
||||
colnames(mul_coll)[-1]<-paste0("adj_",colnames(mul_coll)[-1])
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(lubridate)
|
||||
write.csv(full_join(biv_coll,mul_coll),paste0("/Volumes/Data/depression/imp_regression_",today(),".csv"))
|
||||
```
|
||||
|
BIN
dep_imputation.pdf
Normal file
BIN
dep_imputation.pdf
Normal file
Binary file not shown.
97
dep_median.Rmd
Normal file
97
dep_median.Rmd
Normal file
@ -0,0 +1,97 @@
|
||||
---
|
||||
title: "Additional data analysis"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output: pdf_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
|
||||
```
|
||||
|
||||
# Import
|
||||
```{r}
|
||||
dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv")
|
||||
```
|
||||
|
||||
# Defining patients to include for analysis
|
||||
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||||
```{r}
|
||||
dta<-dta_all[!is.na(dta_all$pase_0),]
|
||||
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||||
quantile(dta$pase_0, probs = seq(0, 1, 0.25), names = TRUE)
|
||||
```
|
||||
|
||||
## Formatting
|
||||
```{r echo=FALSE}
|
||||
dta$rtreat<-factor(dta$rtreat)
|
||||
dta$pase_6<-as.numeric(dta$pase_6)
|
||||
```
|
||||
|
||||
# Summaries
|
||||
|
||||
Fraction with inc_time of at least 166 days
|
||||
```{r}
|
||||
dt<-as.numeric(dta[dta$excluded_6%in%c("dt_6","en_6"),c("inc_time")])>=166
|
||||
summary(dt)
|
||||
length(dt[dt==TRUE])/length(dt)*100 # Percent after 166 days
|
||||
```
|
||||
|
||||
5% percentiler
|
||||
```{r}
|
||||
quantile(dt, probs = seq(0, 1, 0.05), names = TRUE)
|
||||
```
|
||||
|
||||
|
||||
Base version
|
||||
```{r}
|
||||
aggregate(pase_6 ~ rtreat, data = dta, summary)
|
||||
```
|
||||
|
||||
Fancy version
|
||||
```{r}
|
||||
psych::describeBy(dta$pase_6, dta$rtreat,mat=T)
|
||||
```
|
||||
|
||||
# Mann-Whitney U test
|
||||
See: https://stat-methods.com/home/mann-whitney-u-r/
|
||||
|
||||
```{r}
|
||||
#Perform the Mann-Whitney U test
|
||||
m1<-wilcox.test(pase_6 ~ rtreat, data=dta, na.rm=TRUE,
|
||||
paired=FALSE, exact=FALSE, conf.int=TRUE)
|
||||
print(m1)
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Boxplot
|
||||
|
||||
## Base function - simple
|
||||
```{r}
|
||||
boxplot(dta$pase_6 ~ dta$rtreat)
|
||||
```
|
||||
|
||||
## ggplot2 - fancy version
|
||||
```{r}
|
||||
library(ggplot2)
|
||||
ggplot(dta, aes(x = rtreat, y = pase_6, fill = rtreat)) +
|
||||
stat_boxplot(geom ="errorbar", width = 0.5) +
|
||||
geom_boxplot(fill = "light blue") +
|
||||
stat_summary(fun.y=mean, geom="point", shape=10, size=3.5, color="black") +
|
||||
# Point symbol is mean value
|
||||
ggtitle("Boxplot of Treatments C and D") +
|
||||
theme_bw() + theme(legend.position="none")
|
||||
```
|
||||
|
||||
# Bonus: QQ plots
|
||||
```{r}
|
||||
library(qqplotr)
|
||||
ggplot(data = dta, mapping = aes(sample = pase_6, color = rtreat, fill = rtreat)) +
|
||||
stat_qq_band(alpha=0.5, conf=0.95, qtype=1, bandType = "boot") +
|
||||
stat_qq_line(identity=TRUE) +
|
||||
stat_qq_point(col="black") +
|
||||
facet_wrap(~ rtreat, scales = "free") +
|
||||
labs(x = "Theoretical Quantiles", y = "Sample Quantiles") + theme_bw()
|
||||
```
|
||||
|
BIN
dep_median.pdf
Normal file
BIN
dep_median.pdf
Normal file
Binary file not shown.
220
dep_regression.Rmd
Normal file
220
dep_regression.Rmd
Normal file
@ -0,0 +1,220 @@
|
||||
---
|
||||
title: "Regression analyses"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output: pdf_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE, message = FALSE)
|
||||
```
|
||||
|
||||
|
||||
# Import
|
||||
```{r}
|
||||
dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv")
|
||||
```
|
||||
|
||||
# Defining patients to include for analysis
|
||||
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||||
```{r}
|
||||
dta<-dta_all[!is.na(dta_all$pase_0),]
|
||||
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||||
```
|
||||
|
||||
## Formatting
|
||||
```{r echo=FALSE}
|
||||
dta$diabetes<-factor(dta$diabetes)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$civil<-factor(dta$civil)
|
||||
dta$hypertension<-factor(dta$hypertension)
|
||||
dta$afli<-factor(dta$afli)
|
||||
dta$smoke_ever<-factor(dta$smoke_ever)
|
||||
dta$ami<-factor(dta$ami)
|
||||
dta$tci<-factor(dta$tci)
|
||||
dta$thrombolysis<-factor(dta$thrombolysis)
|
||||
dta$thrombechtomy<-factor(dta$thrombechtomy)
|
||||
dta$rep_any<-factor(dta$rep_any)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$nihss_0<-as.numeric(dta$nihss_0)
|
||||
dta$age<-as.numeric(dta$age)
|
||||
dta$rtreat<-factor(dta$rtreat)
|
||||
dta$sex<-factor(dta$sex)
|
||||
dta$pase_0<-as.numeric(dta$pase_0)
|
||||
dta$pase_6<-as.numeric(dta$pase_6)
|
||||
dta$bmi<-as.numeric(dta$bmi)
|
||||
dta$mdi_6<-as.numeric(dta$mdi_6)
|
||||
dta$pase_0_bin<-factor(dta$pase_0_bin,levels=c("lower","higher"))
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
# Backup
|
||||
dta_b<-dta
|
||||
```
|
||||
|
||||
|
||||
# Linear regression analysis
|
||||
|
||||
```{r}
|
||||
library(broom)
|
||||
library(daDoctoR)
|
||||
library(lubridate)
|
||||
```
|
||||
|
||||
## Tests of variables to adjust for
|
||||
```{r}
|
||||
# Possible variables to include
|
||||
adjs<-c("age","sex","smoke_ever","civil","bmi","diabetes", "hypertension", "afli","pad","nihss_0","rep_any")
|
||||
```
|
||||
|
||||
```{r}
|
||||
# Variables with p<10% i bivariable linear regression analysis
|
||||
print(adjs_10<-rep_lm(meas = "mdi_6",string=c("pase_0_bin","rtreat",adjs),data=dta,cut.p = .1)[[2]])
|
||||
```
|
||||
|
||||
## True mean estimations (adjusted)
|
||||
|
||||
```{r}
|
||||
strt<-append(print_pred_stratum(meas = "mdi_6",adj=unique(c("pase_0_bin",adjs_10)),strat="rtreat",data=dta,include.stratum = T),print_pred_stratum(meas = "mdi_6",adj=c(adjs_10[adjs_10!="pase_0_bin"],"rtreat"),strat="pase_0_bin",data=dta,include.stratum = T))
|
||||
|
||||
for (i in 1:length(strt)){
|
||||
write.csv(strt[[i]][[1]],paste0("tbl_md6_",substr(names(strt)[i],1,3),".csv"))
|
||||
}
|
||||
|
||||
c<-c()
|
||||
for (i in 1:length(strt)){
|
||||
c<-c(c,paste("Estimated true mean,",names(strt)[i]),strt[[i]][[5]])
|
||||
}
|
||||
mat_true<-matrix(c(c,c("Variables adjusted for:",paste(c("rtreat",adjs_10), collapse=', '))), ncol=2, byrow = T)
|
||||
```
|
||||
|
||||
# MDI outcome 2x2
|
||||
|
||||
```{r}
|
||||
sts<-c("pase_0_bin","rtreat")
|
||||
# sts<-c("rtreat","pase_0_bin")
|
||||
adjs_10m<-adjs_10[adjs_10!="pase_0_bin"]
|
||||
```
|
||||
|
||||
## One month
|
||||
|
||||
### Enriched
|
||||
|
||||
```{r echo=FALSE}
|
||||
outs<-c("mdi_1_enr")
|
||||
|
||||
source("/Volumes/Data/depression/script_regression_frame.R")
|
||||
|
||||
show(reg_frm)
|
||||
write.csv(reg_frm,paste0(outs,"_matrix_",today(),".csv"))
|
||||
```
|
||||
|
||||
|
||||
### Raw
|
||||
```{r echo=FALSE}
|
||||
outs<-c("mdi_1")
|
||||
|
||||
source("/Volumes/Data/depression/script_regression_frame.R")
|
||||
|
||||
show(reg_frm)
|
||||
write.csv(reg_frm,paste0(outs,"_matrix_",today(),".csv"))
|
||||
```
|
||||
|
||||
|
||||
## Six months
|
||||
|
||||
### New Observations - enriched
|
||||
```{r echo=FALSE}
|
||||
outs<-c("mdi_6_newobs_enr") # Outcome is based on new observations for 2nd (6 months) visit
|
||||
|
||||
# mean(dta$inc_time[!is.na(dta[,outs])])
|
||||
# quantile(dta$inc_time[!is.na(dta[,outs])])
|
||||
|
||||
source("/Volumes/Data/depression/script_regression_frame.R")
|
||||
|
||||
show(reg_frm)
|
||||
write.csv(reg_frm,paste0(outs,"_matrix_",today(),".csv"))
|
||||
```
|
||||
|
||||
### New Observations
|
||||
```{r echo=FALSE}
|
||||
outs<-c("mdi_6_newobs") # Outcome is based on new observations for 2nd (6 months) visit
|
||||
|
||||
# mean(dta$inc_time[!is.na(dta[,outs])])
|
||||
# quantile(dta$inc_time[!is.na(dta[,outs])])
|
||||
|
||||
source("/Volumes/Data/depression/script_regression_frame.R")
|
||||
|
||||
show(reg_frm)
|
||||
write.csv(reg_frm,paste0(outs,"_matrix_",today(),".csv"))
|
||||
```
|
||||
|
||||
### New observations - adjusted for 6 months PASE
|
||||
```{r echo=FALSE}
|
||||
outs<-c("mdi_6_newobs")
|
||||
adjs_10m<-c(adjs_10[adjs_10!="pase_0_bin"],"pase_6")
|
||||
|
||||
# mean(dta$inc_time[!is.na(dta[,outs])])
|
||||
# quantile(dta$inc_time[!is.na(dta[,outs])])
|
||||
|
||||
source("/Volumes/Data/depression/script_regression_frame.R")
|
||||
|
||||
show(reg_frm)
|
||||
write.csv(reg_frm,paste0(outs,"_matrix_",today(),".csv"))
|
||||
```
|
||||
|
||||
|
||||
## Dichotomized sensitivity analysis
|
||||
|
||||
```{r}
|
||||
dta$composite_out<-case_when(dta$open_treat=="yes"|(dta$mdi_6_newobs-dta$mdi_1)>5~"yes",
|
||||
is.na(dta$mdi_6_newobs)~"NA",
|
||||
is.na(dta$mdi_1)~"NA",
|
||||
TRUE~"no")
|
||||
dta$composite_out[dta$composite_out=="NA"]<-NA
|
||||
summary(dta$composite_out<-factor(dta$composite_out))
|
||||
```
|
||||
|
||||
## Enriching and cleaning variables
|
||||
|
||||
```{r}
|
||||
# Enriching
|
||||
dta$pad[is.na(dta$pad)]<-"no"
|
||||
dta$hypertension[is.na(dta$hypertension)]<-"no"
|
||||
|
||||
# Cleaning
|
||||
dta$civil<-factor(ifelse(dta$civil=="unknown",NA,dta$civil))
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
table(dta$rtreat,dta$pase_0_bin)
|
||||
|
||||
outs<-"composite_out"
|
||||
sts<-c("pase_0_bin","rtreat")
|
||||
# sts<-c("rtreat","pase_0_bin")
|
||||
adjs_10m<-adjs_10[adjs_10!="pase_0_bin"]
|
||||
dta_frm<-dta[!is.na(dta$composite_out),c(outs,sts,adjs_10m)]
|
||||
|
||||
summary(dta_frm)
|
||||
|
||||
# colnames(dta_frm)[1]<-"outs"
|
||||
|
||||
# print_log(meas="composite_out",var=sts[2],adj=c(sts[1],adjs_10m),data=dta_frm)
|
||||
|
||||
# print_pred(meas="composite_out",adj=c(sts[2],adjs_10m),data=dta_frm[dta_frm$pase_0_bin=="lower",],n.by.adj = T)
|
||||
|
||||
composite_out_lst<-list(print_pred_stratum(meas="composite_out",strat = sts[1],adj=c(sts[2],adjs_10m),
|
||||
data=dta_frm,n.by.adj = T),
|
||||
print_pred_stratum(meas="composite_out",strat = sts[2],adj=c(sts[1],adjs_10m),
|
||||
data=dta_frm,n.by.adj = T))
|
||||
|
||||
# show(composite_out_lst)
|
||||
capture.output(show(composite_out_lst),
|
||||
file = paste0("composite_out_lst",today(),".txt"))
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
BIN
dep_regression.pdf
Normal file
BIN
dep_regression.pdf
Normal file
Binary file not shown.
162
dep_regression_interaction.R
Normal file
162
dep_regression_interaction.R
Normal file
@ -0,0 +1,162 @@
|
||||
## TALOS Depression regression analyses
|
||||
## Created: 07.apr.2022
|
||||
## New regression analyses incl anova in new tables based on gtsummary
|
||||
|
||||
|
||||
## =============================================================================
|
||||
## Data import and formatting
|
||||
## =============================================================================
|
||||
|
||||
dta_all<-read.csv("/Volumes/Data/depression/dep_dataset.csv",na.strings = c("NA","unknown"))
|
||||
|
||||
dta<-dta_all[!is.na(dta_all$pase_0),]
|
||||
|
||||
dta$diabetes<-factor(dta$diabetes)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$civil<-factor(dta$civil)
|
||||
dta$hypertension<-factor(dta$hypertension)
|
||||
dta$afli<-factor(dta$afli)
|
||||
dta$smoke_ever<-factor(dta$smoke_ever)
|
||||
dta$ami<-factor(dta$ami)
|
||||
dta$tci<-factor(dta$tci)
|
||||
dta$thrombolysis<-factor(dta$thrombolysis)
|
||||
dta$thrombechtomy<-factor(dta$thrombechtomy)
|
||||
dta$rep_any<-factor(dta$rep_any,labels=c("no","yes"))
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$nihss_0<-as.numeric(dta$nihss_0)
|
||||
dta$age<-as.numeric(dta$age)
|
||||
dta$rtreat<-factor(dta$rtreat, levels=c("Placebo","Active"))
|
||||
dta$sex<-factor(dta$sex)
|
||||
dta$pase_0<-as.numeric(dta$pase_0)
|
||||
dta$pase_6<-as.numeric(dta$pase_6)
|
||||
dta$bmi<-as.numeric(dta$bmi)
|
||||
dta$mdi_6<-as.numeric(dta$mdi_6)
|
||||
dta$pase_0_bin<-factor(dta$pase_0_bin,levels=c("lower","higher"))
|
||||
|
||||
dta_b<-dta
|
||||
|
||||
|
||||
library(broom)
|
||||
library(daDoctoR)
|
||||
library(lubridate)
|
||||
library(gtsummary)
|
||||
library(dplyr)
|
||||
library(gt)
|
||||
|
||||
## =============================================================================
|
||||
## Regression setup
|
||||
## =============================================================================
|
||||
|
||||
# Possible variables to adjust for
|
||||
adjs<-c("age","sex","smoke_ever","civil","diabetes", "hypertension", "afli","pad","nihss_0","rep_any")
|
||||
|
||||
preds<-c("pase_0_bin","rtreat")
|
||||
|
||||
# Selection of variables
|
||||
print(adjs_10<-rep_lm(meas = "mdi_6",string=c(preds,adjs),data=dta,cut.p = .1)[[2]])
|
||||
|
||||
## =============================================================================
|
||||
## One month regression
|
||||
## =============================================================================
|
||||
|
||||
out="mdi_1_enr"
|
||||
|
||||
ds<-dta%>%select(all_of(c(out,preds,adjs_10)))
|
||||
|
||||
# AOV
|
||||
# aov<-aov(as.formula(paste(out,".",sep = "~")),
|
||||
# data = ds) %>%
|
||||
# tidy() %>%
|
||||
# gt()
|
||||
|
||||
# Interaction
|
||||
int<-lm(as.formula(paste(out,paste0(paste(preds,collapse = "*"),"+."),sep = "~")),
|
||||
data = ds) %>%
|
||||
tbl_regression()%>%
|
||||
# add_n%>%
|
||||
bold_labels() %>%
|
||||
italicize_levels()
|
||||
|
||||
# Bivariate
|
||||
biv<-tbl_uvregression(data=ds,
|
||||
y=out,
|
||||
method=lm
|
||||
) %>%
|
||||
bold_labels() %>%
|
||||
add_n%>%
|
||||
italicize_levels()
|
||||
|
||||
# Multivariate
|
||||
mul<-lm(as.formula(paste(out,".",sep = "~")),
|
||||
data = ds) %>%
|
||||
tbl_regression()%>%
|
||||
bold_labels() %>%
|
||||
add_n()%>%
|
||||
italicize_levels()
|
||||
|
||||
# Table merge
|
||||
tbl_comb<-tbl_merge(
|
||||
tbls = list(biv, mul,int),
|
||||
tab_spanner = c("**Bivariate**",
|
||||
"**Multivariate**",
|
||||
"**Multivariate with interaction**")
|
||||
)
|
||||
|
||||
tbl_comb
|
||||
|
||||
tbl_reg_1_rtf <- file("tbl_reg_1.RTF", "w")
|
||||
writeLines(tbl_comb%>%as_gt()%>%as_rtf(), tbl_reg_1_rtf)
|
||||
close(tbl_reg_1_rtf)
|
||||
|
||||
## =============================================================================
|
||||
## Six month regression
|
||||
## =============================================================================
|
||||
|
||||
out="mdi_6_newobs_enr"
|
||||
|
||||
ds<-dta%>%select(all_of(c(out,preds,adjs_10)))
|
||||
|
||||
# AOV
|
||||
# aov<-aov(as.formula(paste(out,".",sep = "~")),
|
||||
# data = ds) %>%
|
||||
# tidy() %>%
|
||||
# gt()
|
||||
|
||||
# Interaction
|
||||
int<-lm(as.formula(paste(out,paste0(paste(preds,collapse = "*"),"+."),sep = "~")),
|
||||
data = ds) %>%
|
||||
tbl_regression()%>%
|
||||
# add_n%>%
|
||||
bold_labels() %>%
|
||||
italicize_levels()
|
||||
|
||||
# Bivariate
|
||||
biv<-tbl_uvregression(data=ds,
|
||||
y=out,
|
||||
method=lm
|
||||
) %>%
|
||||
bold_labels() %>%
|
||||
add_n%>%
|
||||
italicize_levels()
|
||||
|
||||
# Multivariate
|
||||
mul<-lm(as.formula(paste(out,".",sep = "~")),
|
||||
data = ds) %>%
|
||||
tbl_regression()%>%
|
||||
bold_labels() %>%
|
||||
add_n()%>%
|
||||
italicize_levels()
|
||||
|
||||
# Table merge
|
||||
tbl_comb<-tbl_merge(
|
||||
tbls = list(biv, mul,int),
|
||||
tab_spanner = c("**Bivariate**",
|
||||
"**Multivariate**",
|
||||
"**Multivariate with interaction**")
|
||||
)
|
||||
|
||||
tbl_comb
|
||||
|
||||
tbl_reg_6_rtf <- file("tbl_reg_6.RTF", "w")
|
||||
writeLines(tbl_comb%>%as_gt()%>%as_rtf(), tbl_reg_1_rtf)
|
||||
close(tbl_reg_6_rtf)
|
118
dep_tableone.Rmd
Normal file
118
dep_tableone.Rmd
Normal file
@ -0,0 +1,118 @@
|
||||
---
|
||||
title: "Table One"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output: pdf_document
|
||||
---
|
||||
# Import
|
||||
```{r}
|
||||
dta<-read.csv("/Volumes/Data/depression/dep_dataset.csv")
|
||||
```
|
||||
|
||||
## Formatting
|
||||
```{r}
|
||||
dta$diabetes<-factor(dta$diabetes)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$civil<-factor(dta$civil)
|
||||
dta$hypertension<-factor(dta$hypertension)
|
||||
dta$afli<-factor(dta$afli)
|
||||
dta$smoke_ever<-factor(dta$smoke_ever)
|
||||
dta$ami<-factor(dta$ami)
|
||||
dta$tci<-factor(dta$tci)
|
||||
dta$thrombolysis<-factor(dta$thrombolysis)
|
||||
dta$thrombechtomy<-factor(dta$thrombechtomy)
|
||||
dta$rep_any<-factor(dta$rep_any)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$nihss_0<-as.numeric(dta$nihss_0)
|
||||
dta$age<-as.numeric(dta$age)
|
||||
dta$rtreat<-factor(dta$rtreat)
|
||||
dta$sex<-factor(dta$sex)
|
||||
dta$pase_0<-as.numeric(dta$pase_0)
|
||||
dta$bmi<-as.numeric(dta$bmi)
|
||||
dta$mdi_6<-as.numeric(dta$mdi_6)
|
||||
dta$inc_time<-as.numeric(dta$inc_time)
|
||||
```
|
||||
|
||||
# Defining patients to include for analysis
|
||||
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||||
```{r}
|
||||
dta<-dta[!is.na(dta$pase_0),]
|
||||
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||||
```
|
||||
|
||||
## Defining table one stratification
|
||||
```{r}
|
||||
dta$strat_table_one<-factor(case_when(is.na(dta$mdi_6_newobs)~"zExcluded",
|
||||
dta$pase_0_bin=="lower"~"xLower",
|
||||
dta$pase_0_bin=="higher"~"yHigher"))
|
||||
|
||||
# summary(dta$strat_table_one)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(plyr)
|
||||
dta$in_ex<-mapvalues(dta$strat_table_one, from=c("xLower", "yHigher"), to=c("xIncluded","xIncluded"))
|
||||
# summary(dta$in_ex)
|
||||
library(dplyr)
|
||||
```
|
||||
|
||||
|
||||
# Basic analyses
|
||||
```{r}
|
||||
show(mdn<-median(dta$pase_0))
|
||||
hist(dta$pase_0,100)
|
||||
hist(sqrt(dta$pase_0),100)
|
||||
```
|
||||
|
||||
# Table One
|
||||
```{r}
|
||||
library(tableone)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tbl_norm<-c("rtreat","age","sex","bmi","smoke_ever","civil","diabetes", "hypertension", "afli", "ami", "tci","pad","nihss_0", "thrombolysis", "thrombechtomy","rep_any","inc_time")
|
||||
tbl_cat<-c("rtreat","sex","diabetes", "hypertension", "smoke_ever","civil", "ami", "tci", "thrombolysis", "thrombechtomy","rep_any")
|
||||
tbl_non<-c("age","nihss_0","inc_time")
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab1 <- CreateTableOne(vars = tbl_norm, data = dta, factorVars = tbl_cat,includeNA = TRUE)
|
||||
tbl1_1<-print(tab1, contDigits = 1, missing=T,showAllLevels=T ,nonnormal = tbl_non, smd = FALSE, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab2 <- CreateTableOne(vars = tbl_norm, strata="pase_0_bin",data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
tbl1_2<-print(tab2, contDigits = 1, missing=T,showAllLevels=T ,nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab3 <- CreateTableOne(vars = tbl_norm, strata="strat_table_one",data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
tbl1_3<-print(tab3, contDigits = 1, missing=T,showAllLevels=T ,nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab4 <- CreateTableOne(vars = tbl_norm, strata="in_ex",data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
tbl1_4<-print(tab4, contDigits = 1, missing=T,showAllLevels=T ,nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
table(is.na(dta$nihss_0),dta$strat_table_one)
|
||||
table(is.na(dta$bmi),dta$strat_table_one)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
dta<-dta[dta$strat_table_one!="zExcluded",]
|
||||
dta$strat_table_one<-factor(dta$strat_table_one)
|
||||
tab5 <- CreateTableOne(vars = tbl_norm, strata="strat_table_one",data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
tbl1_5<-print(tab5, contDigits = 1, missing=T,showAllLevels=T ,nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library(lubridate)
|
||||
tbl_list<-list(tbl1_1,tbl1_2,tbl1_3,tbl1_4,tbl1_5)
|
||||
for (i in 1:length(tbl_list)){
|
||||
nm<-paste0("tbl1_",i)
|
||||
write.csv(tbl_list[[i]],paste0("/Volumes/Data/depression/",nm,"_",unlist(strsplit(as.character(now()),"[ ]"))[1],".csv"))
|
||||
}
|
||||
```
|
BIN
dep_tableone.pdf
Normal file
BIN
dep_tableone.pdf
Normal file
Binary file not shown.
153
dep_tableone_enriched.Rmd
Normal file
153
dep_tableone_enriched.Rmd
Normal file
@ -0,0 +1,153 @@
|
||||
---
|
||||
title: "Table One - enriched"
|
||||
author: "Andreas Gammelgaard Damsbo"
|
||||
date: "Knitted: `r format(Sys.time(), '%d %B, %Y')`"
|
||||
output: pdf_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE, message = FALSE)
|
||||
```
|
||||
|
||||
|
||||
# Import
|
||||
```{r}
|
||||
dta<-read.csv("/Volumes/Data/depression/dep_dataset.csv")
|
||||
```
|
||||
|
||||
## Formatting
|
||||
```{r}
|
||||
dta$diabetes<-factor(dta$diabetes)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$civil<-factor(dta$civil)
|
||||
dta$hypertension<-factor(dta$hypertension)
|
||||
dta$afli<-factor(dta$afli)
|
||||
dta$smoke_ever<-factor(dta$smoke_ever)
|
||||
dta$ami<-factor(dta$ami)
|
||||
dta$tci<-factor(dta$tci)
|
||||
dta$thrombolysis<-factor(dta$thrombolysis)
|
||||
dta$thrombechtomy<-factor(dta$thrombechtomy)
|
||||
dta$rep_any<-factor(dta$rep_any)
|
||||
dta$pad<-factor(dta$pad)
|
||||
dta$nihss_0<-as.numeric(dta$nihss_0)
|
||||
dta$age<-as.numeric(dta$age)
|
||||
dta$rtreat<-factor(dta$rtreat)
|
||||
dta$sex<-factor(dta$sex)
|
||||
dta$pase_0<-as.numeric(dta$pase_0)
|
||||
dta$bmi<-as.numeric(dta$bmi)
|
||||
dta$mdi_6<-as.numeric(dta$mdi_6)
|
||||
dta$inc_time<-as.numeric(dta$inc_time)
|
||||
|
||||
dta$bmi_isna<-is.na(dta$bmi)
|
||||
dta$nihss_0_isna<-is.na(dta$nihss_0)
|
||||
```
|
||||
|
||||
# Defining patients to include for analysis
|
||||
```{r message=FALSE}
|
||||
library(dplyr)
|
||||
```
|
||||
|
||||
|
||||
Only including cases with complete pase_0 and MDI at 1 & 6 months
|
||||
```{r}
|
||||
dta<-dta[!is.na(dta$pase_0),]
|
||||
# &!is.na(dta$mdi_1)&!is.na(dta$mdi_6)
|
||||
```
|
||||
|
||||
## Defining table one stratification
|
||||
```{r}
|
||||
dta$strat_table_one<-factor(case_when(dta$excluded_6%in%c("mi_6","ex_6")~"zExcluded",
|
||||
dta$pase_0_bin=="lower"~"xLower",
|
||||
dta$pase_0_bin=="higher"~"yHigher"))
|
||||
|
||||
summary(dta$strat_table_one)
|
||||
```
|
||||
|
||||
```{r}
|
||||
dta$in_ex<-plyr::mapvalues(dta$strat_table_one,
|
||||
from=c("xLower", "yHigher"),
|
||||
to=c("xIncluded","xIncluded"))
|
||||
# summary(dta$in_ex)
|
||||
```
|
||||
|
||||
|
||||
# Basic analyses
|
||||
```{r}
|
||||
show(mdn<-median(dta$pase_0))
|
||||
hist(dta$pase_0,100)
|
||||
hist(sqrt(dta$pase_0),100)
|
||||
```
|
||||
|
||||
# Table One
|
||||
```{r}
|
||||
library(tableone)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tbl_norm<-c("rtreat","age","sex","bmi","bmi_isna","smoke_ever","civil","diabetes",
|
||||
"hypertension", "afli", "ami", "tci","pad","nihss_0","nihss_0_isna",
|
||||
"thrombolysis", "thrombechtomy","rep_any","inc_time")
|
||||
|
||||
tbl_cat<-c("rtreat","sex","bmi_isna","diabetes", "hypertension", "smoke_ever","civil",
|
||||
"ami", "tci","nihss_0_isna", "thrombolysis",
|
||||
"thrombechtomy","rep_any")
|
||||
|
||||
tbl_non<-c("age","nihss_0","inc_time")
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab1 <- CreateTableOne(vars = tbl_norm, data = dta,
|
||||
factorVars = tbl_cat,includeNA = TRUE)
|
||||
|
||||
tbl1_1<-print(tab1, contDigits = 1, missing=T,showAllLevels=T ,
|
||||
nonnormal = tbl_non, smd = FALSE, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab2 <- CreateTableOne(vars = tbl_norm, strata="pase_0_bin",
|
||||
data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
|
||||
tbl1_2<-print(tab2, contDigits = 1, missing=T,showAllLevels=T,
|
||||
nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab3 <- CreateTableOne(vars = tbl_norm, strata="strat_table_one",
|
||||
data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
tbl1_3<-print(tab3, contDigits = 1, missing=T,showAllLevels=T,
|
||||
nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tab4 <- CreateTableOne(vars = tbl_norm, strata="in_ex",
|
||||
data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
|
||||
tbl1_4<-print(tab4, contDigits = 1, missing=T,showAllLevels=T,
|
||||
nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
table(is.na(dta$nihss_0),dta$strat_table_one)
|
||||
table(is.na(dta$bmi),dta$strat_table_one)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
dta<-dta[dta$strat_table_one!="zExcluded",]
|
||||
dta$strat_table_one<-factor(dta$strat_table_one)
|
||||
tab5 <- CreateTableOne(vars = tbl_norm, strata="strat_table_one",
|
||||
data = dta, factorVars = tbl_cat,includeNA = T)
|
||||
|
||||
tbl1_5<-print(tab5, contDigits = 1, missing=T,showAllLevels=T,
|
||||
nonnormal = tbl_non, smd = F,test = T, quote = F, noSpaces = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
tbl_list<-list(tbl1_1,tbl1_2,tbl1_3,tbl1_4,tbl1_5)
|
||||
for (i in 1:length(tbl_list)){
|
||||
nm<-paste0("tbl1_",i)
|
||||
write.csv(tbl_list[[i]],
|
||||
paste0("/Volumes/Data/depression/",nm,"_enr_",
|
||||
lubridate::today(),".csv"))
|
||||
}
|
||||
```
|
BIN
dep_tableone_enriched.pdf
Normal file
BIN
dep_tableone_enriched.pdf
Normal file
Binary file not shown.
10
function_chi_test_sum.R
Normal file
10
function_chi_test_sum.R
Normal file
@ -0,0 +1,10 @@
|
||||
chi_test_sum<-function(a,b,aname=NULL,bname=NULL){
|
||||
chi<-chisq.test(table(a,b))
|
||||
|
||||
lst<-list(matrix(chi$observed,nrow=2,byrow = FALSE,dimnames = list(levels(factor(a)),levels(factor(b)))),chi$p.value)
|
||||
names(lst)<-c(paste("observed,",aname,"vs",bname),"pval^")
|
||||
return(lst)
|
||||
}
|
||||
|
||||
|
||||
|
54
function_flow.R
Normal file
54
function_flow.R
Normal file
@ -0,0 +1,54 @@
|
||||
flow_prog<-function(df,sngl,sngl_keep,mltp=NULL){
|
||||
# df is the data frame
|
||||
# sngl is a vector of variable names to exclude from the data frame one by one, prioritised
|
||||
# sngl_keep is a variable of names of the same length as sngl containing the vector names of cases to keep for flow analysis
|
||||
# mltp is a vector of variable names to count, can be one or more
|
||||
# first sngl, then mltp
|
||||
|
||||
# all specified variables should be factors or "factorisable"
|
||||
|
||||
# If categorical variables are decided, define a new variable. More elegant solution is desirable, but this works (!).
|
||||
|
||||
# Dependencies
|
||||
library(dplyr)
|
||||
|
||||
# Small, home made, general summary function with naming
|
||||
summary_list<-function(var){
|
||||
v<-factor(var)
|
||||
x<-levels(v)
|
||||
y<-summary(v)
|
||||
lst<-list(data.frame(matrix(y,nrow=length(x),byrow = TRUE,dimnames = list(x,deparse(substitute(var)))))) # deparse and substitue is used to get the name of the vector for naming the matrix. Nice!
|
||||
return(lst)
|
||||
}
|
||||
|
||||
slist<-list(nrow(df))
|
||||
names(slist)[1]<-"nrow of provided data frame"
|
||||
|
||||
# Handling sngl first
|
||||
for (i in 1:length(sngl)){
|
||||
z<-summary_list(df[,sngl[i]])
|
||||
colnames(z[[1]])<-sngl[i]
|
||||
slist[length(slist)+1] <- z
|
||||
names(slist)[length(slist)]<-sngl[i]
|
||||
df <- df %>% filter(df[,sngl[i]]==sngl_keep[i])
|
||||
}
|
||||
|
||||
# Handling mltp
|
||||
|
||||
if (!is.null(mltp)){
|
||||
for (i in 1:length(mltp)){
|
||||
z<-summary_list(df[,mltp[i]])
|
||||
colnames(z[[1]])<-mltp[i]
|
||||
slist[length(slist)+1] <- z
|
||||
names(slist)[length(slist)]<-paste0(mltp[i],"_for_",last(sngl),"==",last(sngl_keep))
|
||||
}
|
||||
}
|
||||
|
||||
return(slist)
|
||||
}
|
||||
|
||||
|
||||
df<-dta
|
||||
sngl<-c("mors_v1","drop")
|
||||
sngl_keep<-c("no","yes")
|
||||
mltp<-c("open_treat","wants_out","side_effect","side_effect2")
|
32
script_distribution_plot.R
Normal file
32
script_distribution_plot.R
Normal file
@ -0,0 +1,32 @@
|
||||
# Distribution plot
|
||||
library(ggplot2)
|
||||
library(cowplot)
|
||||
|
||||
pase_hist<-ggplot(data=dta, aes(pase_0)) +
|
||||
geom_histogram(binwidth = 10, aes(y =..density..),
|
||||
col="orange",
|
||||
fill="orange",
|
||||
alpha=.2) +
|
||||
geom_density(col="tomato") +
|
||||
geom_vline(xintercept=median(dta$pase_0),col="navy") +
|
||||
scale_x_continuous(limits=c(0,600) ,breaks=seq(0,600,by=50)) +
|
||||
labs(title="Baseline PASE distribution", x="PASE score", y="Count") +
|
||||
theme(axis.title.x=element_blank(),
|
||||
axis.text.x=element_blank(),
|
||||
axis.ticks.x=element_blank(),
|
||||
axis.text.y=element_blank(),
|
||||
axis.ticks.y=element_blank()
|
||||
)
|
||||
|
||||
pase_box<-ggplot(data=dta, aes(pase_0,y=1)) +
|
||||
geom_boxplot(col="olivedrab",
|
||||
fill="olivedrab",
|
||||
alpha=.2) +
|
||||
geom_jitter(col="maroon", alpha=0.3) +
|
||||
scale_x_continuous(limits=c(0,600) , breaks=seq(0,600,by=50)) +
|
||||
labs(title=NULL, x="PASE score", y="Count") +
|
||||
theme(axis.text.y=element_blank(),
|
||||
axis.ticks.y=element_blank()
|
||||
)
|
||||
|
||||
pg<-cowplot::plot_grid(pase_hist, pase_box, align = "v", ncol = 1, rel_heights = c(0.7, 0.3))
|
21
script_flowchart.R
Normal file
21
script_flowchart.R
Normal file
@ -0,0 +1,21 @@
|
||||
# Flowchart
|
||||
|
||||
# Data set defined in dep_regression.Rmd
|
||||
summary_list<-function(var){
|
||||
v<-factor(var)
|
||||
x<-levels(v)
|
||||
y<-summary(v)
|
||||
lst<-list(data.frame(matrix(y,nrow=length(x),byrow = TRUE,dimnames = list(x,deparse(substitute(var)))))) # deparse and substitue is used to get the name of the vector for naming the matrix. Nice!
|
||||
return(lst)
|
||||
}
|
||||
|
||||
slist<-summary_list(dta$drop)
|
||||
|
||||
dta<-dta[dta$drop=="yes",]
|
||||
|
||||
vars_l<-c("open_treat","wants_out","side_effect","side_effect2")
|
||||
for (i in 1:length(vars_l)){
|
||||
z<-summary_list(dta[,vars_l[i]])
|
||||
colnames(z[[1]])<-vars_l[i]
|
||||
slist[length(slist)+1] <- z
|
||||
}
|
56
script_regression_frame.R
Normal file
56
script_regression_frame.R
Normal file
@ -0,0 +1,56 @@
|
||||
envir_b<-ls()
|
||||
## Regression frame
|
||||
dta_frm<-dta[,c(outs,sts,adjs_10m)]
|
||||
colnames(dta_frm)[1]<-"outs"
|
||||
|
||||
lst<-list()
|
||||
## 'rtreat' horisontal, 'pase_0_bin' vertical
|
||||
for (i in 1:length(outs)){
|
||||
for (j in 1:length(sts)){
|
||||
ls_22<-list(print_diff_bygroup(meas="outs",group=sts[j],var=sts[length(sts)+1-j],adj=adjs_10m,data=dta_frm))
|
||||
names(ls_22)<-paste0(outs[i],"_",sts[j],"_ver_",sts[length(sts)+1-j],"_hor")
|
||||
lst<-append(lst,ls_22)
|
||||
}
|
||||
lst<-append(lst,print_pred_stratum(meas="outs",adj=c(sts[1],adjs_10m),strat=sts[2],data=dta_frm,include.stratum = T)[1])
|
||||
}
|
||||
|
||||
# outs (outcome), Active
|
||||
oadr<-lst[[2]][c(6,8)][2,1] # LvsH, Diff raw
|
||||
oada<-lst[[2]][c(6,8)][2,2] # LvsH, Diff adj
|
||||
#oada_1<-oada
|
||||
|
||||
# outs, Placebo
|
||||
opdr<-lst[[2]][c(6,8)][3,1] # LvsH, Diff raw
|
||||
opda<-lst[[2]][c(6,8)][3,2] # LvsH, Diff adj
|
||||
#opda_1<-opda
|
||||
|
||||
spc<-""
|
||||
spc4<-c("","","","")
|
||||
|
||||
mdi1_tbl<-rbind(lst[[1]],matrix(c("Unadjusted mean diff.",spc,oadr,spc,opdr,spc4,
|
||||
"Adjusted mean diff.",spc,oada,spc,opda,spc4),ncol=ncol(lst[[1]]),byrow=T,dimnames = list(c("a","b"),names(lst[[1]]))))
|
||||
|
||||
# write.csv(mdi1_tbl,"mdi1_2x2.csv")
|
||||
|
||||
reg_frm<-cbind(mdi1_tbl[,1],"",mdi1_tbl[,2:6],mdi1_tbl[,8])
|
||||
names(reg_frm)<-c("By_PA", "Rand_Total", "N_Active", "Mean_Active", "N_Placebo", "Mean_Placebo","Unadjusted_mean_diff", "Adjusted_mean_diff")
|
||||
reg_frm[1,1]<-"PASE_total"
|
||||
reg_frm[1,2]<-paste0(round(mean(dta_frm$outs, na.rm = TRUE), 1)," (",round(sd(dta_frm$outs, na.rm = TRUE), 1),")")
|
||||
reg_frm[1,3]<-nrow(dta_frm[dta_frm$rtreat=="Active"&!is.na(dta_frm$outs),])
|
||||
reg_frm[1,4]<-paste0(round(mean(dta_frm$outs[dta_frm$rtreat=="Active"], na.rm = TRUE), 1)," (",round(sd(dta_frm$outs[dta_frm$rtreat=="Active"], na.rm = TRUE), 1),")")
|
||||
reg_frm[1,5]<-nrow(dta_frm[dta_frm$rtreat=="Placebo"&!is.na(dta_frm$outs),])
|
||||
reg_frm[1,6]<-paste0(round(mean(dta_frm$outs[dta_frm$rtreat=="Placebo"], na.rm = TRUE), 1)," (",round(sd(dta_frm$outs[dta_frm$rtreat=="Placebo"], na.rm = TRUE), 1),")")
|
||||
|
||||
## Det var fedt med en universel løsning her, så den vender rigtigt i forhold til vektoren
|
||||
reg_frm[2,2]<-paste0(round(mean(dta_frm$outs[dta_frm$pase_0_bin=="lower"], na.rm = TRUE), 1)," (",round(sd(dta_frm$outs[dta_frm$pase_0_bin=="lower"], na.rm = TRUE), 1),")")
|
||||
reg_frm[3,2]<-paste0(round(mean(dta_frm$outs[dta_frm$pase_0_bin=="higher"], na.rm = TRUE), 1)," (",round(sd(dta_frm$outs[dta_frm$pase_0_bin=="higher"], na.rm = TRUE), 1),")")
|
||||
|
||||
reg_frm[4,2]<-lst[[3]][[1]][7,3]
|
||||
reg_frm[5,2]<-lst[[3]][[1]][7,4]
|
||||
|
||||
reg_frm[1,7]<-lst[[3]][[1]][4,3]
|
||||
reg_frm[1,8]<-lst[[3]][[1]][4,4]
|
||||
|
||||
source("/Volumes/Data/func/remove_all_but.R")
|
||||
|
||||
remove_all_but(c("reg_frm",envir_b))
|
13
talos-pa-depression.Rproj
Normal file
13
talos-pa-depression.Rproj
Normal file
@ -0,0 +1,13 @@
|
||||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Default
|
||||
SaveWorkspace: Default
|
||||
AlwaysSaveHistory: Default
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
1051
tbl_reg_1.RTF
Normal file
1051
tbl_reg_1.RTF
Normal file
File diff suppressed because it is too large
Load Diff
1051
tbl_reg_6.RTF
Normal file
1051
tbl_reg_6.RTF
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user