PhysicalActivityandStrokeOu.../apps/Assignment/group_assign.R

189 lines
5.9 KiB
R
Raw Normal View History

group_assignment <-
function(ds,
cap_classes = NULL,
excess_space = NULL,
pre_assign = NULL) {
require(dplyr)
require(tidyr)
require(ROI)
require(ROI.plugin.symphony)
require(ompr)
require(ompr.roi)
if (!is.data.frame(ds)){
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")}
## This program very much trust the user to supply correctly formatted data
cost <- t(ds[,-1]) #Transpose converts to matrix
colnames(cost) <- ds[,1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
if (is.null(excess_space)) {
excess <- 1.2
} else {
excess <- excess_space
}
# generous round up of capacities
if (is.null(cap_classes)) {
capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
# } else if (!is.numeric(cap_classes)) {
# stop("cap_classes has to be numeric")
} else if (length(cap_classes)==1){
capacity <- ceiling(rep(cap_classes,num_groups)*excess)
} else if (length(cap_classes)==num_groups){
capacity <- ceiling(cap_classes*excess)
} else {
stop("cap_classes has to be either length 1 or same as number of groups")
}
## This test should be a little more elegant
## pre_assign should be a data.frame or matrix with an ID and assignment column
with_pre_assign <- FALSE
if (!is.null(pre_assign)){
# Setting flag for later and export list
with_pre_assign <- TRUE
# Splitting to list for later merging
pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
# Subtracting capacity numbers, to reflect already filled spots
capacity <- capacity-lengths(pre)
# Making sure pre_assigned are removed from main data set
ds <- ds[!ds[[1]] %in% pre_assign[[1]],]
cost <- t(ds[,-1])
colnames(cost) <- ds[,1]
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
}
## Simple NA handling. Better to handle NAs yourself!
cost[is.na(cost)] <- num_groups
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- MIPModel() %>%
add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary") %>%
## The first constraint says that group size should not exceed capacity
add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m) %>%
## The second constraint says each subject can only be in one group
add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
## The objective is set to minimize the cost of the assignments
## Giving subjects the group with the highest possible ranking
set_objective(sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min") %>%
solve_model(with_ROI(solver = "symphony", verbosity = 1))
## Getting assignments
solution <- get_solution(m, grp[i, j]) %>% filter(value > 0)
assign <- solution |> select(i,j)
if (!is.null(rownames(cost))){
assign$i <- rownames(cost)[assign$i]
}
if (!is.null(colnames(cost))){
assign$j <- colnames(cost)[assign$j]
}
## Splitting into groups based on assignment
assign_ls <- split(assign$j,assign$i)
## Extracting subject cost for the final assignment for evaluation
if (is.null(rownames(cost))){
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))){
colnames(cost) <- seq_len(ncol(cost))
}
eval <- lapply(seq_len(length(assign_ls)),function(i){
ndx <- match(names(assign_ls)[i],rownames(cost))
cost[ndx,assign_ls[[i]]]
})
names(eval) <- names(assign_ls)
if (with_pre_assign){
names(pre) <- names(assign_ls)
assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
out <- list(all_assigned=assign_all)
} else {
out <- list(all_assigned=assign_ls)
}
export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
}))
out <- append(out,
list(evaluation=eval,
assigned=assign_ls,
solution = solution,
capacity = capacity,
excess = excess,
pre_assign = with_pre_assign,
cost_scale = levels(factor(cost)),
input=ds,
export=export))
# exists("excess")
return(out)
}
## Assessment performance overview
## The function plots costs of assignment for each subject in every group
assignment_plot <- function(lst){
dl <- lst[[2]]
cost_scale <- unique(lst[[8]])
cap <- lst[[5]]
cnts_ls <- lapply(dl,function(i){
factor(i,levels=cost_scale)
})
require(ggplot2)
require(patchwork)
require(viridisLite)
y_max <- max(lengths(dl))
wrap_plots(lapply(seq_along(dl),function(i){
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- cnts_ls[[i]]
ggplot() + geom_bar(aes(cnts,fill=cnts)) +
scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
scale_y_continuous(name = NULL, limits = c(0,y_max)) +
scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
guides(fill=FALSE) + labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1) ,")"))
}))
}
## Helper function for Shiny
file_extension <- function(filenames) {
sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
}