##  Tests and visualisation
source("data_format.R")

## Box-Cox power transformation performs comparably to logarithmic transformation. The latter is much easier to explain.


library(MASS)
dta_bc<-dta_backup|>
  dplyr::select(all_of(c("mdi_6_newobs_enr",vars)))|>
  mutate(pase_0=sqrt(pase_0),
         mdi_6_newobs_enr=mdi_6_newobs_enr+1)#|>
# na.omit()

bc<-boxcox(mdi_6_newobs_enr~.,data=dta_bc)
lambda <- bc$x[which.max(bc$y)]


## Q-Q plots to compare the two different approaches, and the non-transformed


q1 <- qqnorm(lm(((mdi_6_newobs_enr^lambda-1)/lambda) ~ .,data=dta_bc)$residuals)

q2 <- qqnorm(lm(log(mdi_6_newobs_enr) ~ .,data=dta_bc)$residuals)
library(patchwork)

plot(q1); plot(q2)

## Histograms for reference


h1 <- hist(dta_bc$pase_0,40); hist(sqrt(dta_bc$pase_0),40)

h2 <- hist(log(dta_bc$mdi_6_newobs_enr),40); hist((dta_bc$mdi_6_newobs_enr),40) ## Observed MDI, and log transformed MDI

plot(h1); plot(h2)