## Tests and visualisation source("data_format.R") ## Box-Cox power transformation performs comparably to logarithmic transformation. The latter is much easier to explain. library(MASS) dta_bc<-dta_backup|> dplyr::select(all_of(c("mdi_6_newobs_enr",vars)))|> mutate(pase_0=sqrt(pase_0), mdi_6_newobs_enr=mdi_6_newobs_enr+1)#|> # na.omit() bc<-boxcox(mdi_6_newobs_enr~.,data=dta_bc) lambda <- bc$x[which.max(bc$y)] ## Q-Q plots to compare the two different approaches, and the non-transformed q1 <- qqnorm(lm(((mdi_6_newobs_enr^lambda-1)/lambda) ~ .,data=dta_bc)$residuals) q2 <- qqnorm(lm(log(mdi_6_newobs_enr) ~ .,data=dta_bc)$residuals) library(patchwork) plot(q1); plot(q2) ## Histograms for reference h1 <- hist(dta_bc$pase_0,40); hist(sqrt(dta_bc$pase_0),40) h2 <- hist(log(dta_bc$mdi_6_newobs_enr),40); hist((dta_bc$mdi_6_newobs_enr),40) ## Observed MDI, and log transformed MDI plot(h1); plot(h2)