PhysicalActivityandStrokeOu.../1 PA Decline/assign_full.R
2022-09-28 16:03:58 +02:00

142 lines
4.0 KiB
R

## ItMLiHSmar2022
## assign_full.R, child script
## Full model building and analysation for assignment
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
## ====================================================================
## Step 0: data import and wrangling
## ====================================================================
# source("data_format.R")
## ====================================================================
## Step 1: settings
## ====================================================================
K<-10
n<-nrow(X)
set.seed(321)
# Using caret function to ensure both levels represented in all folds
c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE)
B<-list()
auc_train<-auc_test<-c()
catinfo<-levels(y)
cMatTrain<-cMatTest<-table(factor(c(0,0),levels=catinfo),factor(c(0,0),levels=catinfo))
## ====================================================================
## Step 2: cross validation
## ====================================================================
set.seed(321)
## Iterate over partitions
for (idx1 in 1:K){
# Status
cat('Processing fold', idx1, 'of', K,'\n')
# idx1=1
# Get training- and test sets
I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
I_test = !I_train
Xtrain = X[I_train,]
ytrain = y[I_train]
Xtest = X[I_test,]
ytest = y[I_test]
# Z-score standardisation
source("standardise.R")
list2env(standardise(Xtrain,Xtest,type="cs"),.GlobalEnv)
## Outputs XtrainSt and XtestSt
## Standardised by centering and scaling
## Model matrices for glmnet
# Xmat.train<-model.matrix(~.-1,XtrainSt)
# Xmat.test<-model.matrix(~.-1,XtestSt)
# Weights
ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
# Fit regularized linear regression model
mod<-glm(ytrain~.,
data=XtrainSt,
weights = ytrain_weight,
family = stats::quasibinomial(link = "logit"))
# Keep coefficients for plot
B[[idx1]] <- mod
# Predict
yhatTrainProb<-predict(mod,
newdata = XtrainSt,
type = "response"
)
yhatTestProb<-predict(mod,
newdata = XtestSt,
type = "response"
)
# Compute training and test error
yhatTrain = round(yhatTrainProb)
yhatTest = round(yhatTestProb)
# Make predictions categorical again (instead of 0/1 coding)
yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
# Compute confusion matrices
cMatTrain = cMatTrain + table(ytrain,yhatTrainCat)
cMatTest = cMatTest + table(ytest,yhatTestCat)
# AUROC
suppressMessages(
auc_train[idx1]<-auc(ytrain, yhatTrainCat))
suppressMessages(
auc_test [idx1]<-auc(ytest, yhatTestCat))
}
## ====================================================================
# Step 3: creating table of coefficients for inference
## ====================================================================
confs<-lapply(1:K, function(x){
cs<-exp(confint(B[[x]]))
lo<-cs[,1]
hi<-cs[,2]
return(list(lo=lo,hi=hi))
})
coefs<-apply(Reduce('cbind', lapply(B,"[[", "coefficients")),1,mean)
unlist(strsplit(names(B[[1]]$coefficients),2))
var.labels<-c(var.labels,'(Intercept)'="Intercept")
ds<-tibble(name=var.labels[match(unlist(strsplit(names(coefs),2)), names(var.labels))],
coefs=coefs,
OR=round(exp(coefs),3),
CIs=paste0("(",
round(apply(Reduce('cbind', lapply(confs,"[[", "lo")),1,mean),3),
",",
round(apply(Reduce('cbind', lapply(confs,"[[", "hi")),1,mean),3),
")")
)
#
full_coef_tbl<-ds%>%
gt(rowname_col = list(age~"Age"))
#
full_coef_tbl
## ====================================================================
# Step 4: plotting classification performance
## ====================================================================
full_cfm<-confusionMatrix(cMatTest)
full_auc_sum<-summary(auc_test)