group assignment functions
This commit is contained in:
parent
a4c31eab6a
commit
6fae593845
172
side projects/assignment.R
Normal file
172
side projects/assignment.R
Normal file
@ -0,0 +1,172 @@
|
||||
group_assignment <-
|
||||
function(ds,
|
||||
cap_classes = NULL,
|
||||
excess_space = NULL,
|
||||
pre_assign = NULL) {
|
||||
require(dplyr)
|
||||
require(tidyr)
|
||||
require(ROI)
|
||||
require(ROI.plugin.symphony)
|
||||
require(ompr)
|
||||
require(ompr.roi)
|
||||
|
||||
if (!is.data.frame(ds)){
|
||||
stop("Supplied data has to be a data frame, with each row
|
||||
are subjects and columns are groups, with the first column being
|
||||
subject identifiers")}
|
||||
|
||||
## This program very much trust the user to supply correctly formatted data
|
||||
cost <- t(ds[-1]) #Transpose converts to matrix
|
||||
|
||||
num_groups <- dim(cost)[1]
|
||||
num_sub <- dim(cost)[2]
|
||||
|
||||
## Adding the option to introduce a bit of head room to the classes by
|
||||
## the groups to a little bigger than the smallest possible
|
||||
## Default is to allow for an extra 20 % fill
|
||||
if (is.null(cap_classes)) {
|
||||
if (is.null(excess_space)) {
|
||||
excess <- 1.2
|
||||
} else {
|
||||
excess <- excess_space
|
||||
}
|
||||
capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
|
||||
} else {
|
||||
capacity <- cap_classes
|
||||
}
|
||||
|
||||
## This test should be a little more elegant
|
||||
## pre_assign should be a data.frame or matrix with an ID and assignment column
|
||||
with_pre_assign <- FALSE
|
||||
if (!is.null(pre_assign)){
|
||||
with_pre_assign <- TRUE
|
||||
pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
|
||||
capacity <- capacity-lengths(pre)
|
||||
}
|
||||
|
||||
|
||||
i_m <- seq_len(num_groups)
|
||||
j_m <- seq_len(num_sub)
|
||||
|
||||
m <- MIPModel() %>%
|
||||
add_variable(grp[i, j],
|
||||
i = i_m,
|
||||
j = j_m,
|
||||
type = "binary") %>%
|
||||
## The first constraint says that group size should not exceed capacity
|
||||
add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
|
||||
i = i_m) %>%
|
||||
## The second constraint says each subject can only be in one group
|
||||
add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
|
||||
## The objective is set to minimize the cost of the assignments
|
||||
## Giving subjects the group with the highest possible ranking
|
||||
set_objective(sum_expr(
|
||||
cost[i, j] * grp[i, j],
|
||||
i = i_m,
|
||||
j = j_m
|
||||
),
|
||||
"min") %>%
|
||||
solve_model(with_ROI(solver = "symphony", verbosity = 1))
|
||||
|
||||
## Getting assignments
|
||||
assign <- get_solution(m, grp[i, j]) %>%
|
||||
filter(value > 0) |> select(i,j)
|
||||
|
||||
if (!is.null(rownames(cost))){
|
||||
assign$i <- rownames(cost)[assign$i]
|
||||
}
|
||||
|
||||
if (!is.null(colnames(cost))){
|
||||
assign$j <- colnames(cost)[assign$j]
|
||||
}
|
||||
|
||||
## Splitting into groups based on assignment
|
||||
assign_ls <- split(assign$j,assign$i)
|
||||
|
||||
|
||||
## Extracting subject cost for the final assignment for evaluation
|
||||
if (is.null(rownames(cost))){
|
||||
rownames(cost) <- seq_len(nrow(cost))
|
||||
}
|
||||
|
||||
if (is.null(colnames(cost))){
|
||||
colnames(cost) <- seq_len(ncol(cost))
|
||||
}
|
||||
|
||||
eval <- lapply(seq_len(length(assign_ls)),function(i){
|
||||
ndx <- match(names(assign_ls)[i],rownames(cost))
|
||||
cost[ndx,assign_ls[[i]]]
|
||||
})
|
||||
names(eval) <- names(assign_ls)
|
||||
|
||||
if (with_pre_assign){
|
||||
names(pre) <- names(assign_ls)
|
||||
assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
|
||||
|
||||
return(list("Group assignment"=assign_all,
|
||||
"Cost evaluation"=eval,
|
||||
"Group assigned"=assign_ls))
|
||||
} else {
|
||||
return(list("Group assignment"=assign_ls,
|
||||
"Cost evaluation"=eval))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
## Assessment performance overview
|
||||
## The function plots costs of assignment for each subject in every group
|
||||
assignment_plot <- function(dl,cost_scale){
|
||||
require(ggplot2)
|
||||
require(patchwork)
|
||||
|
||||
y_max <- max(lengths(dl))
|
||||
|
||||
wrap_plots(lapply(seq_along(dl),function(i){
|
||||
ttl <- names(dl)[i]
|
||||
ns <- length(dl[[i]])
|
||||
cnts <- factor(dl[[i]],levels=cost_scale)
|
||||
ggplot() + geom_bar(aes(cnts,fill=cnts)) +
|
||||
scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
|
||||
scale_y_continuous(name = NULL, limits = c(0,y_max)) +
|
||||
# coord_cartesian(ylim=c(0,1)) +
|
||||
guides(fill=FALSE) + labs(title=paste0(ttl," (n=",ns,")"))
|
||||
}))
|
||||
}
|
||||
|
||||
## Sample data set is generated with rownames and colnames
|
||||
# ds <- do.call(cbind,lapply(1:133,function(i){
|
||||
# sample(c(1,2,2,3,4,rep(0,12)),size=17)
|
||||
# }))
|
||||
# rownames(ds) <- letters[seq_len(nrow(ds))]
|
||||
# colnames(ds) <- paste0("sub",seq_len(ncol(ds)))
|
||||
|
||||
## Clearing NAs and applying the max cost instead
|
||||
# ds[is.na(ds)] <- 17
|
||||
|
||||
## I believe this would actually be the organic data set
|
||||
# df <- data.frame("ID"=colnames(ds),t(ds))
|
||||
#
|
||||
# df[as.matrix(df)==0] <- 17
|
||||
#
|
||||
# assigned <- df |>
|
||||
# group_assignment(cap_classes = rep(8, 17),excess_space = 1)
|
||||
#
|
||||
#
|
||||
# assigned$`Group assignment`
|
||||
#
|
||||
# assigned$`Cost evaluation` |> assignment_plot(1:5)
|
||||
|
||||
|
||||
## Special cases to consider
|
||||
## - duplicate scores
|
||||
## - missing scores
|
||||
## - Pre-assignment of special cases - SOLVED
|
||||
##
|
||||
## I believe we are ready for a shiny app!
|
||||
|
||||
# pre_grouped <- data.frame("ID"=sample(df$ID,10),"group"=sample(1:17,10))
|
||||
# assigned <- df |>
|
||||
# group_assignment(excess_space = 1.05,
|
||||
# pre_assign = pre_grouped)
|
||||
#
|
||||
# lengths(assigned[[1]])
|
Loading…
Reference in New Issue
Block a user