## ItMLiHSmar2022
## regular_fun.R, child script
## Regularisation model building function
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
## 
## Now modified to use in publication
## 

regular_fun<-function(X,y,K,lambdas,alpha){
n<-nrow(X)
set.seed(321)

# Using caret function to ensure both levels represented in all folds
c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE) 

B<-yhatTestProbKeep<-list()
accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))

catinfo<-levels(y)

cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))


## Iterate over partitions
for (idx1 in 1:K){
  
  # Status
  cat('Processing fold', idx1, 'of', K,'\n')
  
  # idx1=1
  # Get training- and test sets
  I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
  I_test = !I_train
  
  Xtrain = X[I_train,]
  ytrain = y[I_train]
  Xtest = X[I_test,]
  ytest = y[I_test]
  
  
  ## Model matrices for glmnet
  ## Using the complicated approach not to include first level.
  # Xmat.train<-model.matrix(~ .-1, data=Xtrain, 
  #              contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)], 
  #                                     contrasts, contrasts=T))
  # Xmat.test<-model.matrix(~ .-1, data=Xtest, 
  #                contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)], 
  #                                       contrasts, contrasts=T))
  
  # Xmat.train<-model.matrix(~.-1,Xtrain)
  # Xmat.test<-model.matrix(~.-1,Xtest)
  
  # Weights
  ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
  # ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
  
  # Fit regularized linear regression model
  mod<-glmnet(Xtrain, ytrain, 
              alpha = alpha,        ## Alpha = 1 for lasso
              lambda = lambdas, ## Setting lambdas
              standardize = TRUE, ## Scales and centers
              weights = ytrain_weight,
              family = "binomial"
              )
  
  # Keep coefficients for plot
  B[[idx1]] <- as.matrix(coef(mod))
  
  # Iterate over regularization strengths to compute training- and test
  # errors for individual regularization strengths.
  for (idx2 in 1:length(lambdas)){
    # idx2=1
    
    # Predict
    yhatTrainProb<-predict(mod, 
                           s = lambdas[idx2], 
                           newx = data.matrix(Xtrain),
                           type = "response"
    )
    
    yhatTestProb<-predict(mod, 
                          s = lambdas[idx2], 
                          newx = data.matrix(Xtest),
                          type = "response"
    )
    
    # Compute training and test error
    yhatTrain = round(yhatTrainProb)
    yhatTest = round(yhatTestProb)
    
    # Make predictions categorical again (instead of 0/1 coding)
    yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
    yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
    
    # Evaluate classifier performance
    # Accuracy
    # accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
    # accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
    # #
    # # Error rate
    # err_train[idx1,idx2] =  1 - accTrain[idx1,idx2]
    # err_test [idx1,idx2] =   1 - accTest[idx1,idx2]
    
    # AUROC
    suppressMessages(
      auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
    suppressMessages(
      auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
    
    # Compute confusion matrices
    cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
    cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
  }
}
ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
return(ls)
}