42 lines
1.3 KiB
R
42 lines
1.3 KiB
R
|
## ItMLiHSmar2022
|
||
|
## standardise.R, child script
|
||
|
## Data standardisation, returns list
|
||
|
## Andreas Gammelgaard Damsbo, agdamsbo@clin.au.dk
|
||
|
|
||
|
standardise<-function(train,test,type){
|
||
|
# From:
|
||
|
# https://datascience.stackexchange.com/questions/13971/standardization-normalization-test-data-in-r
|
||
|
|
||
|
sel<-sapply(Xtrain,is.numeric) # Deciding which to stadardise (only numeric)
|
||
|
cnm<-colnames(Xtrain) # Saving column names for ordering
|
||
|
|
||
|
# Subsetting
|
||
|
|
||
|
## Data to treat
|
||
|
train.tr<-train[,sel]
|
||
|
test.tr<-test[,sel]
|
||
|
|
||
|
## Data to save
|
||
|
train.sv<-train[,!sel]
|
||
|
test.sv<-test[,!sel]
|
||
|
|
||
|
# Calculate mean and SD of train data
|
||
|
trainMean <- sapply(train.tr,mean)
|
||
|
trainSd <- sapply(train.tr,sd)
|
||
|
|
||
|
if (type=="c"){
|
||
|
## centered
|
||
|
norm.trainData<-sweep(train.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
|
||
|
norm.testData<-sweep(test.tr, 2L, trainMean) # using the default "-" to subtract mean column-wise
|
||
|
}
|
||
|
|
||
|
if (type=="cs"){
|
||
|
## centered AND scaled (Z-score standardisation)
|
||
|
norm.trainData<-sweep(sweep(train.tr, 2L, trainMean), 2, trainSd, "/")
|
||
|
norm.testData<-sweep(sweep(test.tr, 2L, trainMean), 2, trainSd, "/")
|
||
|
}
|
||
|
return(list(XtrainSt=cbind(norm.trainData,train.sv)[,cnm], # Reordering columns to original
|
||
|
XtestSt=cbind(norm.testData,test.sv)[,cnm]))
|
||
|
}
|
||
|
|