36 lines
933 B
R
36 lines
933 B
R
|
## Tests and visualisation
|
||
|
source("data_format.R")
|
||
|
|
||
|
## Box-Cox power transformation performs comparably to logarithmic transformation. The latter is much easier to explain.
|
||
|
|
||
|
|
||
|
library(MASS)
|
||
|
dta_bc<-dta_backup|>
|
||
|
dplyr::select(all_of(c("mdi_6_newobs_enr",vars)))|>
|
||
|
mutate(pase_0=sqrt(pase_0),
|
||
|
mdi_6_newobs_enr=mdi_6_newobs_enr+1)#|>
|
||
|
# na.omit()
|
||
|
|
||
|
bc<-boxcox(mdi_6_newobs_enr~.,data=dta_bc)
|
||
|
lambda <- bc$x[which.max(bc$y)]
|
||
|
|
||
|
|
||
|
## Q-Q plots to compare the two different approaches, and the non-transformed
|
||
|
|
||
|
|
||
|
q1 <- qqnorm(lm(((mdi_6_newobs_enr^lambda-1)/lambda) ~ .,data=dta_bc)$residuals)
|
||
|
|
||
|
q2 <- qqnorm(lm(log(mdi_6_newobs_enr) ~ .,data=dta_bc)$residuals)
|
||
|
library(patchwork)
|
||
|
|
||
|
plot(q1); plot(q2)
|
||
|
|
||
|
## Histograms for reference
|
||
|
|
||
|
|
||
|
h1 <- hist(dta_bc$pase_0,40); hist(sqrt(dta_bc$pase_0),40)
|
||
|
|
||
|
h2 <- hist(log(dta_bc$mdi_6_newobs_enr),40); hist((dta_bc$mdi_6_newobs_enr),40) ## Observed MDI, and log transformed MDI
|
||
|
|
||
|
plot(h1); plot(h2)
|