## ItMLiHSmar2022 ## standardise.R, child script ## Data standardisation, returns list ## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk standardise<-function(train,test,type){ # From: # https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric) cnm<-colnames(Xtrain) # Saving column names for ordering # Subsetting ## Data to treat train.tr<-train[,sel] test.tr<-test[,sel] ## Data to save train.sv<-train[,!sel] test.sv<-test[,!sel] # Calculate mean and SD of train data trainMean <- sapply(train.tr,mean) trainSd <- sapply(train.tr,sd) if (type=="c"){ ## centered norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise } if (type=="cs"){ ## centered AND scaled (Z-score standardisation) norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/") norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/") } return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original XtestSt=cbind(norm.testData,test.sv)[,cnm])) }