group_assignment <- function(ds, cap_classes = NULL, excess_space = NULL, pre_assign = NULL) { require(dplyr) require(tidyr) require(ROI) require(ROI.plugin.symphony) require(ompr) require(ompr.roi) if (!is.data.frame(ds)){ stop("Supplied data has to be a data frame, with each row are subjects and columns are groups, with the first column being subject identifiers")} ## This program very much trust the user to supply correctly formatted data cost <- t(ds[,-1]) #Transpose converts to matrix colnames(cost) <- ds[,1] num_groups <- dim(cost)[1] num_sub <- dim(cost)[2] ## Adding the option to introduce a bit of head room to the classes by ## the groups to a little bigger than the smallest possible ## Default is to allow for an extra 20 % fill if (is.null(excess_space)) { excess <- 1.2 } else { excess <- excess_space } # generous round up of capacities if (is.null(cap_classes)) { capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups) # } else if (!is.numeric(cap_classes)) { # stop("cap_classes has to be numeric") } else if (length(cap_classes)==1){ capacity <- ceiling(rep(cap_classes,num_groups)*excess) } else if (length(cap_classes)==num_groups){ capacity <- ceiling(cap_classes*excess) } else { stop("cap_classes has to be either length 1 or same as number of groups") } ## This test should be a little more elegant ## pre_assign should be a data.frame or matrix with an ID and assignment column with_pre_assign <- FALSE if (!is.null(pre_assign)){ # Setting flag for later and export list with_pre_assign <- TRUE # Splitting to list for later merging pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups))) # Subtracting capacity numbers, to reflect already filled spots capacity <- capacity-lengths(pre) # Making sure pre_assigned are removed from main data set ds <- ds[!ds[[1]] %in% pre_assign[[1]],] cost <- t(ds[,-1]) colnames(cost) <- ds[,1] num_groups <- dim(cost)[1] num_sub <- dim(cost)[2] } ## Simple NA handling. Better to handle NAs yourself! cost[is.na(cost)] <- num_groups i_m <- seq_len(num_groups) j_m <- seq_len(num_sub) m <- MIPModel() %>% add_variable(grp[i, j], i = i_m, j = j_m, type = "binary") %>% ## The first constraint says that group size should not exceed capacity add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i], i = i_m) %>% ## The second constraint says each subject can only be in one group add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>% ## The objective is set to minimize the cost of the assignments ## Giving subjects the group with the highest possible ranking set_objective(sum_expr( cost[i, j] * grp[i, j], i = i_m, j = j_m ), "min") %>% solve_model(with_ROI(solver = "symphony", verbosity = 1)) ## Getting assignments solution <- get_solution(m, grp[i, j]) %>% filter(value > 0) assign <- solution |> select(i,j) if (!is.null(rownames(cost))){ assign$i <- rownames(cost)[assign$i] } if (!is.null(colnames(cost))){ assign$j <- colnames(cost)[assign$j] } ## Splitting into groups based on assignment assign_ls <- split(assign$j,assign$i) ## Extracting subject cost for the final assignment for evaluation if (is.null(rownames(cost))){ rownames(cost) <- seq_len(nrow(cost)) } if (is.null(colnames(cost))){ colnames(cost) <- seq_len(ncol(cost)) } eval <- lapply(seq_len(length(assign_ls)),function(i){ ndx <- match(names(assign_ls)[i],rownames(cost)) cost[ndx,assign_ls[[i]]] }) names(eval) <- names(assign_ls) if (with_pre_assign){ names(pre) <- names(assign_ls) assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE) out <- list(all_assigned=assign_all) } else { out <- list(all_assigned=assign_ls) } export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){ cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i]) })) out <- append(out, list(evaluation=eval, assigned=assign_ls, solution = solution, capacity = capacity, excess = excess, pre_assign = with_pre_assign, cost_scale = levels(factor(cost)), input=ds, export=export)) # exists("excess") return(out) } ## Assessment performance overview ## The function plots costs of assignment for each subject in every group assignment_plot <- function(lst){ dl <- lst[[2]] cost_scale <- unique(lst[[8]]) cap <- lst[[5]] cnts_ls <- lapply(dl,function(i){ factor(i,levels=cost_scale) }) require(ggplot2) require(patchwork) require(viridisLite) y_max <- max(lengths(dl)) wrap_plots(lapply(seq_along(dl),function(i){ ttl <- names(dl)[i] ns <- length(dl[[i]]) cnts <- cnts_ls[[i]] ggplot() + geom_bar(aes(cnts,fill=cnts)) + scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) + scale_y_continuous(name = NULL, limits = c(0,y_max)) + scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) + guides(fill=FALSE) + labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1) ,")")) })) } ## Helper function for Shiny file_extension <- function(filenames) { sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE) }