group assignment functions

This commit is contained in:
Andreas Gammelgaard Damsbo 2023-09-08 15:47:39 -07:00
parent a4c31eab6a
commit 6fae593845

172
side projects/assignment.R Normal file
View File

@ -0,0 +1,172 @@
group_assignment <-
function(ds,
cap_classes = NULL,
excess_space = NULL,
pre_assign = NULL) {
require(dplyr)
require(tidyr)
require(ROI)
require(ROI.plugin.symphony)
require(ompr)
require(ompr.roi)
if (!is.data.frame(ds)){
stop("Supplied data has to be a data frame, with each row
are subjects and columns are groups, with the first column being
subject identifiers")}
## This program very much trust the user to supply correctly formatted data
cost <- t(ds[-1]) #Transpose converts to matrix
num_groups <- dim(cost)[1]
num_sub <- dim(cost)[2]
## Adding the option to introduce a bit of head room to the classes by
## the groups to a little bigger than the smallest possible
## Default is to allow for an extra 20 % fill
if (is.null(cap_classes)) {
if (is.null(excess_space)) {
excess <- 1.2
} else {
excess <- excess_space
}
capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
} else {
capacity <- cap_classes
}
## This test should be a little more elegant
## pre_assign should be a data.frame or matrix with an ID and assignment column
with_pre_assign <- FALSE
if (!is.null(pre_assign)){
with_pre_assign <- TRUE
pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
capacity <- capacity-lengths(pre)
}
i_m <- seq_len(num_groups)
j_m <- seq_len(num_sub)
m <- MIPModel() %>%
add_variable(grp[i, j],
i = i_m,
j = j_m,
type = "binary") %>%
## The first constraint says that group size should not exceed capacity
add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
i = i_m) %>%
## The second constraint says each subject can only be in one group
add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
## The objective is set to minimize the cost of the assignments
## Giving subjects the group with the highest possible ranking
set_objective(sum_expr(
cost[i, j] * grp[i, j],
i = i_m,
j = j_m
),
"min") %>%
solve_model(with_ROI(solver = "symphony", verbosity = 1))
## Getting assignments
assign <- get_solution(m, grp[i, j]) %>%
filter(value > 0) |> select(i,j)
if (!is.null(rownames(cost))){
assign$i <- rownames(cost)[assign$i]
}
if (!is.null(colnames(cost))){
assign$j <- colnames(cost)[assign$j]
}
## Splitting into groups based on assignment
assign_ls <- split(assign$j,assign$i)
## Extracting subject cost for the final assignment for evaluation
if (is.null(rownames(cost))){
rownames(cost) <- seq_len(nrow(cost))
}
if (is.null(colnames(cost))){
colnames(cost) <- seq_len(ncol(cost))
}
eval <- lapply(seq_len(length(assign_ls)),function(i){
ndx <- match(names(assign_ls)[i],rownames(cost))
cost[ndx,assign_ls[[i]]]
})
names(eval) <- names(assign_ls)
if (with_pre_assign){
names(pre) <- names(assign_ls)
assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
return(list("Group assignment"=assign_all,
"Cost evaluation"=eval,
"Group assigned"=assign_ls))
} else {
return(list("Group assignment"=assign_ls,
"Cost evaluation"=eval))
}
}
## Assessment performance overview
## The function plots costs of assignment for each subject in every group
assignment_plot <- function(dl,cost_scale){
require(ggplot2)
require(patchwork)
y_max <- max(lengths(dl))
wrap_plots(lapply(seq_along(dl),function(i){
ttl <- names(dl)[i]
ns <- length(dl[[i]])
cnts <- factor(dl[[i]],levels=cost_scale)
ggplot() + geom_bar(aes(cnts,fill=cnts)) +
scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
scale_y_continuous(name = NULL, limits = c(0,y_max)) +
# coord_cartesian(ylim=c(0,1)) +
guides(fill=FALSE) + labs(title=paste0(ttl," (n=",ns,")"))
}))
}
## Sample data set is generated with rownames and colnames
# ds <- do.call(cbind,lapply(1:133,function(i){
# sample(c(1,2,2,3,4,rep(0,12)),size=17)
# }))
# rownames(ds) <- letters[seq_len(nrow(ds))]
# colnames(ds) <- paste0("sub",seq_len(ncol(ds)))
## Clearing NAs and applying the max cost instead
# ds[is.na(ds)] <- 17
## I believe this would actually be the organic data set
# df <- data.frame("ID"=colnames(ds),t(ds))
#
# df[as.matrix(df)==0] <- 17
#
# assigned <- df |>
# group_assignment(cap_classes = rep(8, 17),excess_space = 1)
#
#
# assigned$`Group assignment`
#
# assigned$`Cost evaluation` |> assignment_plot(1:5)
## Special cases to consider
## - duplicate scores
## - missing scores
## - Pre-assignment of special cases - SOLVED
##
## I believe we are ready for a shiny app!
# pre_grouped <- data.frame("ID"=sample(df$ID,10),"group"=sample(1:17,10))
# assigned <- df |>
# group_assignment(excess_space = 1.05,
# pre_assign = pre_grouped)
#
# lengths(assigned[[1]])