PhysicalActivityandStrokeOu.../1 PA Decline/archive/generation_1/regular_fun.R

118 lines
3.7 KiB
R
Raw Normal View History

## ItMLiHSmar2022
## regular_fun.R, child script
## Regularisation model building function
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
##
## Now modified to use in publication
##
regular_fun<-function(X,y,K,lambdas,alpha){
n<-nrow(X)
set.seed(321)
# Using caret function to ensure both levels represented in all folds
c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE)
B<-yhatTestProbKeep<-list()
accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))
catinfo<-levels(y)
cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))
## Iterate over partitions
for (idx1 in 1:K){
# Status
cat('Processing fold', idx1, 'of', K,'\n')
# idx1=1
# Get training- and test sets
I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
I_test = !I_train
Xtrain = X[I_train,]
ytrain = y[I_train]
Xtest = X[I_test,]
ytest = y[I_test]
## Model matrices for glmnet
## Using the complicated approach not to include first level.
# Xmat.train<-model.matrix(~ .-1, data=Xtrain,
# contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)],
# contrasts, contrasts=T))
# Xmat.test<-model.matrix(~ .-1, data=Xtest,
# contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)],
# contrasts, contrasts=T))
# Xmat.train<-model.matrix(~.-1,Xtrain)
# Xmat.test<-model.matrix(~.-1,Xtest)
# Weights
ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
# ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
# Fit regularized linear regression model
mod<-glmnet(Xtrain, ytrain,
alpha = alpha, ## Alpha = 1 for lasso
lambda = lambdas, ## Setting lambdas
standardize = TRUE, ## Scales and centers
weights = ytrain_weight,
family = "binomial"
)
# Keep coefficients for plot
B[[idx1]] <- as.matrix(coef(mod))
# Iterate over regularization strengths to compute training- and test
# errors for individual regularization strengths.
for (idx2 in 1:length(lambdas)){
# idx2=1
# Predict
yhatTrainProb<-predict(mod,
s = lambdas[idx2],
newx = data.matrix(Xtrain),
type = "response"
)
yhatTestProb<-predict(mod,
s = lambdas[idx2],
newx = data.matrix(Xtest),
type = "response"
)
# Compute training and test error
yhatTrain = round(yhatTrainProb)
yhatTest = round(yhatTestProb)
# Make predictions categorical again (instead of 0/1 coding)
yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
# Evaluate classifier performance
# Accuracy
# accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
# accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
# #
# # Error rate
# err_train[idx1,idx2] = 1 - accTrain[idx1,idx2]
# err_test [idx1,idx2] = 1 - accTest[idx1,idx2]
# AUROC
suppressMessages(
auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
suppressMessages(
auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
# Compute confusion matrices
cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
}
}
ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
return(ls)
}