## ItMLiHSmar2022 ## regular_fun.R, child script ## Regularisation model building function ## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk ## ## Now modified to use in publication ## regular_fun<-function(X,y,K,lambdas,alpha){ n<-nrow(X) set.seed(321) # Using caret function to ensure both levels represented in all folds c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE) B<-yhatTestProbKeep<-list() accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas)) catinfo<-levels(y) cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo)) ## Iterate over partitions for (idx1 in 1:K){ # Status cat('Processing fold', idx1, 'of', K,'\n') # idx1=1 # Get training- and test sets I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE I_test = !I_train Xtrain = X[I_train,] ytrain = y[I_train] Xtest = X[I_test,] ytest = y[I_test] ## Model matrices for glmnet ## Using the complicated approach not to include first level. # Xmat.train<-model.matrix(~ .-1, data=Xtrain, # contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)], # contrasts, contrasts=T)) # Xmat.test<-model.matrix(~ .-1, data=Xtest, # contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)], # contrasts, contrasts=T)) # Xmat.train<-model.matrix(~.-1,Xtrain) # Xmat.test<-model.matrix(~.-1,Xtest) # Weights ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain))) # ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest))) # Fit regularized linear regression model mod<-glmnet(Xtrain, ytrain, alpha = alpha, ## Alpha = 1 for lasso lambda = lambdas, ## Setting lambdas standardize = TRUE, ## Scales and centers weights = ytrain_weight, family = "binomial" ) # Keep coefficients for plot B[[idx1]] <- as.matrix(coef(mod)) # Iterate over regularization strengths to compute training- and test # errors for individual regularization strengths. for (idx2 in 1:length(lambdas)){ # idx2=1 # Predict yhatTrainProb<-predict(mod, s = lambdas[idx2], newx = data.matrix(Xtrain), type = "response" ) yhatTestProb<-predict(mod, s = lambdas[idx2], newx = data.matrix(Xtest), type = "response" ) # Compute training and test error yhatTrain = round(yhatTrainProb) yhatTest = round(yhatTestProb) # Make predictions categorical again (instead of 0/1 coding) yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE) yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE) # Evaluate classifier performance # Accuracy # accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain) # accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest) # # # # Error rate # err_train[idx1,idx2] = 1 - accTrain[idx1,idx2] # err_test [idx1,idx2] = 1 - accTest[idx1,idx2] # AUROC suppressMessages( auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat)) suppressMessages( auc_test [idx1,idx2]<-auc(ytest, yhatTestCat)) # Compute confusion matrices cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat) cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat) } } ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest) return(ls) }