PhysicalActivityandStrokeOu.../1 PA Decline/archive/generation_1/regular_fun.R

## ItMLiHSmar2022
## regular_fun.R, child script
## Regularisation model building function
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
## 
## Now modified to use in publication
## 

regular_fun<-function(X,y,K,lambdas,alpha){
n<-nrow(X)
set.seed(321)

# Using caret function to ensure both levels represented in all folds
c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE) 

B<-yhatTestProbKeep<-list()
accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))

catinfo<-levels(y)

cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))


## Iterate over partitions
for (idx1 in 1:K){
  
  # Status
  cat('Processing fold', idx1, 'of', K,'\n')
  
  # idx1=1
  # Get training- and test sets
  I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE
  I_test = !I_train
  
  Xtrain = X[I_train,]
  ytrain = y[I_train]
  Xtest = X[I_test,]
  ytest = y[I_test]
  
  
  ## Model matrices for glmnet
  ## Using the complicated approach not to include first level.
  # Xmat.train<-model.matrix(~ .-1, data=Xtrain, 
  #              contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)], 
  #                                     contrasts, contrasts=T))
  # Xmat.test<-model.matrix(~ .-1, data=Xtest, 
  #                contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)], 
  #                                       contrasts, contrasts=T))
  
  # Xmat.train<-model.matrix(~.-1,Xtrain)
  # Xmat.test<-model.matrix(~.-1,Xtest)
  
  # Weights
  ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))
  # ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))
  
  # Fit regularized linear regression model
  mod<-glmnet(Xtrain, ytrain, 
              alpha = alpha,        ## Alpha = 1 for lasso
              lambda = lambdas, ## Setting lambdas
              standardize = TRUE, ## Scales and centers
              weights = ytrain_weight,
              family = "binomial"
              )
  
  # Keep coefficients for plot
  B[[idx1]] <- as.matrix(coef(mod))
  
  # Iterate over regularization strengths to compute training- and test
  # errors for individual regularization strengths.
  for (idx2 in 1:length(lambdas)){
    # idx2=1
    
    # Predict
    yhatTrainProb<-predict(mod, 
                           s = lambdas[idx2], 
                           newx = data.matrix(Xtrain),
                           type = "response"
    )
    
    yhatTestProb<-predict(mod, 
                          s = lambdas[idx2], 
                          newx = data.matrix(Xtest),
                          type = "response"
    )
    
    # Compute training and test error
    yhatTrain = round(yhatTrainProb)
    yhatTest = round(yhatTestProb)
    
    # Make predictions categorical again (instead of 0/1 coding)
    yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
    yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)
    
    # Evaluate classifier performance
    # Accuracy
    # accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)
    # accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)
    # #
    # # Error rate
    # err_train[idx1,idx2] =  1 - accTrain[idx1,idx2]
    # err_test [idx1,idx2] =   1 - accTest[idx1,idx2]
    
    # AUROC
    suppressMessages(
      auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))
    suppressMessages(
      auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))
    
    # Compute confusion matrices
    cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)
    cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)
  }
}
ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)
return(ls)
}
all analyses of project one moved to new generation (old saved as gen_1) 2022-12-02 16:43:31 +01:00			`## ItMLiHSmar2022`
			`## regular_fun.R, child script`
			`## Regularisation model building function`
			`## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk`
			`##`
			`## Now modified to use in publication`
			`##`

			`regular_fun<-function(X,y,K,lambdas,alpha){`
			`n<-nrow(X)`
			`set.seed(321)`

			`# Using caret function to ensure both levels represented in all folds`
			`c<-createFolds(y=y, k = K, list = FALSE, returnTrain = TRUE)`

			`B<-yhatTestProbKeep<-list()`
			`accTrain<-accTest<-err_train<-err_test<-auc_train<-auc_test<-matrix(nrow = K,ncol = length(lambdas))`

			`catinfo<-levels(y)`

			`cMatTrain<-cMatTest<-table(true=factor(c(0,0),levels=catinfo),pred=factor(c(0,0),levels=catinfo))`


			`## Iterate over partitions`
			`for (idx1 in 1:K){`

			`# Status`
			`cat('Processing fold', idx1, 'of', K,'\n')`

			`# idx1=1`
			`# Get training- and test sets`
			`I_train = c!=idx1 ## Creating selection vector of TRUE/FALSE`
			`I_test = !I_train`

			`Xtrain = X[I_train,]`
			`ytrain = y[I_train]`
			`Xtest = X[I_test,]`
			`ytest = y[I_test]`


			`## Model matrices for glmnet`
			`## Using the complicated approach not to include first level.`
			`# Xmat.train<-model.matrix(~ .-1, data=Xtrain,`
			`# contrasts.arg = lapply(Xtrain[,sapply(Xtrain, is.factor)],`
			`# contrasts, contrasts=T))`
			`# Xmat.test<-model.matrix(~ .-1, data=Xtest,`
			`# contrasts.arg = lapply(Xtest[,sapply(Xtest, is.factor)],`
			`# contrasts, contrasts=T))`

			`# Xmat.train<-model.matrix(~.-1,Xtrain)`
			`# Xmat.test<-model.matrix(~.-1,Xtest)`

			`# Weights`
			`ytrain_weight<-as.vector(1 - (table(ytrain)[ytrain] / length(ytrain)))`
			`# ytest_weight<-as.vector(1 / (table(ytest)[ytest] / length(ytest)))`

			`# Fit regularized linear regression model`
			`mod<-glmnet(Xtrain, ytrain,`
			`alpha = alpha, ## Alpha = 1 for lasso`
			`lambda = lambdas, ## Setting lambdas`
			`standardize = TRUE, ## Scales and centers`
			`weights = ytrain_weight,`
			`family = "binomial"`
			`)`

			`# Keep coefficients for plot`
			`B[[idx1]] <- as.matrix(coef(mod))`

			`# Iterate over regularization strengths to compute training- and test`
			`# errors for individual regularization strengths.`
			`for (idx2 in 1:length(lambdas)){`
			`# idx2=1`

			`# Predict`
			`yhatTrainProb<-predict(mod,`
			`s = lambdas[idx2],`
			`newx = data.matrix(Xtrain),`
			`type = "response"`
			`)`

			`yhatTestProb<-predict(mod,`
			`s = lambdas[idx2],`
			`newx = data.matrix(Xtest),`
			`type = "response"`
			`)`

			`# Compute training and test error`
			`yhatTrain = round(yhatTrainProb)`
			`yhatTest = round(yhatTestProb)`

			`# Make predictions categorical again (instead of 0/1 coding)`
			`yhatTrainCat = factor(round(yhatTrainProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)`
			`yhatTestCat = factor(round(yhatTestProb),levels=c("0","1"),labels=catinfo,ordered = TRUE)`

			`# Evaluate classifier performance`
			`# Accuracy`
			`# accTrain[idx1,idx2] <- sum(yhatTrainCat==ytrain)/length(ytrain)`
			`# accTest [idx1,idx2] <- sum(yhatTestCat==ytest)/length(ytest)`
			`# #`
			`# # Error rate`
			`# err_train[idx1,idx2] = 1 - accTrain[idx1,idx2]`
			`# err_test [idx1,idx2] = 1 - accTest[idx1,idx2]`

			`# AUROC`
			`suppressMessages(`
			`auc_train[idx1,idx2]<-auc(ytrain, yhatTrainCat))`
			`suppressMessages(`
			`auc_test [idx1,idx2]<-auc(ytest, yhatTestCat))`

			`# Compute confusion matrices`
			`cMatTrain = cMatTrain + table(true=ytrain,pred=yhatTrainCat)`
			`cMatTest = cMatTest + table(true=ytest,pred=yhatTestCat)`
			`}`
			`}`
			`ls<-list(mod=mod,B=B,auc_train=auc_train,auc_test=auc_test,cMatTrain=cMatTrain,cMatTest=cMatTest)`
			`return(ls)`
			`}`