PhysicalActivityandStrokeOu.../1 PA Decline/archive/generation_1/standardise.R

42 lines
1.3 KiB
R

## ItMLiHSmar2022
## standardise.R, child script
## Data standardisation, returns list
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
standardise<-function(train,test,type){
# From:
# https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r
sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric)
cnm<-colnames(Xtrain) # Saving column names for ordering
# Subsetting
## Data to treat
train.tr<-train[,sel]
test.tr<-test[,sel]
## Data to save
train.sv<-train[,!sel]
test.sv<-test[,!sel]
# Calculate mean and SD of train data
trainMean <- sapply(train.tr,mean)
trainSd <- sapply(train.tr,sd)
if (type=="c"){
## centered
norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
}
if (type=="cs"){
## centered AND scaled (Z-score standardisation)
norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/")
norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/")
}
return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original
XtestSt=cbind(norm.testData,test.sv)[,cnm]))
}