group assignment functions

2023-09-08 15:47:39 -07:00 · 2023-09-08 15:47:39 -07:00 · 6fae593845
commit 6fae593845
parent a4c31eab6a
1 changed files with 172 additions and 0 deletions
--- a/projects/assignment.R
+++ b/projects/assignment.R
@ -0,0 +1,172 @@
 group_assignment <-
  function(ds,
           cap_classes = NULL,
           excess_space = NULL,
           pre_assign = NULL) {
    require(dplyr)
    require(tidyr)
    require(ROI)
    require(ROI.plugin.symphony)
    require(ompr)
    require(ompr.roi)
    if (!is.data.frame(ds)){
      stop("Supplied data has to be a data frame, with each row
           are subjects and columns are groups, with the first column being
           subject identifiers")}
    ## This program very much trust the user to supply correctly formatted data
    cost <- t(ds[-1]) #Transpose converts to matrix
    num_groups <- dim(cost)[1]
    num_sub <- dim(cost)[2]
    ## Adding the option to introduce a bit of head room to the classes by
    ## the groups to a little bigger than the smallest possible
    ## Default is to allow for an extra 20 % fill
    if (is.null(cap_classes)) {
      if (is.null(excess_space)) {
        excess <- 1.2
      } else {
        excess <- excess_space
      }
      capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
    } else {
      capacity <- cap_classes
    }
    ## This test should be a little more elegant
    ## pre_assign should be a data.frame or matrix with an ID and assignment column
    with_pre_assign <- FALSE
    if (!is.null(pre_assign)){
      with_pre_assign <- TRUE
      pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
      capacity <- capacity-lengths(pre)
    }
    i_m <- seq_len(num_groups)
    j_m <- seq_len(num_sub)
    m <- MIPModel() %>%
      add_variable(grp[i, j],
                   i = i_m,
                   j = j_m,
                   type = "binary") %>%
      ## The first constraint says that group size should not exceed capacity
      add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
                     i = i_m) %>%
      ## The second constraint says each subject can only be in one group
      add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
      ## The objective is set to minimize the cost of the assignments
      ## Giving subjects the group with the highest possible ranking
      set_objective(sum_expr(
        cost[i, j] * grp[i, j],
        i = i_m,
        j = j_m
      ),
      "min") %>%
      solve_model(with_ROI(solver = "symphony", verbosity = 1))
    ## Getting assignments
    assign <- get_solution(m, grp[i, j]) %>%
      filter(value > 0) |> select(i,j)
    if (!is.null(rownames(cost))){
      assign$i <- rownames(cost)[assign$i]
    }
    if (!is.null(colnames(cost))){
      assign$j <- colnames(cost)[assign$j]
    }
    ## Splitting into groups based on assignment
    assign_ls <- split(assign$j,assign$i)
    ## Extracting subject cost for the final assignment for evaluation
    if (is.null(rownames(cost))){
      rownames(cost) <- seq_len(nrow(cost))
    }
    if (is.null(colnames(cost))){
      colnames(cost) <- seq_len(ncol(cost))
    }
    eval <- lapply(seq_len(length(assign_ls)),function(i){
      ndx <- match(names(assign_ls)[i],rownames(cost))
      cost[ndx,assign_ls[[i]]]
    })
    names(eval) <- names(assign_ls)
    if (with_pre_assign){
      names(pre) <- names(assign_ls)
      assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
      return(list("Group assignment"=assign_all,
                  "Cost evaluation"=eval,
                  "Group assigned"=assign_ls))
    } else {
    return(list("Group assignment"=assign_ls,
                "Cost evaluation"=eval))
      }
  }
 ## Assessment performance overview
 ## The function plots costs of assignment for each subject in every group
 assignment_plot <- function(dl,cost_scale){
  require(ggplot2)
  require(patchwork)
  y_max <- max(lengths(dl))
  wrap_plots(lapply(seq_along(dl),function(i){
    ttl <- names(dl)[i]
    ns <- length(dl[[i]])
    cnts <- factor(dl[[i]],levels=cost_scale)
    ggplot() + geom_bar(aes(cnts,fill=cnts)) +
      scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
      scale_y_continuous(name = NULL, limits = c(0,y_max)) + 
      # coord_cartesian(ylim=c(0,1)) + 
      guides(fill=FALSE) + labs(title=paste0(ttl," (n=",ns,")"))
  })) 
 }
 ## Sample data set is generated with rownames and colnames
 # ds <- do.call(cbind,lapply(1:133,function(i){
 #   sample(c(1,2,2,3,4,rep(0,12)),size=17)
 # })) 
 # rownames(ds) <- letters[seq_len(nrow(ds))]
 # colnames(ds) <- paste0("sub",seq_len(ncol(ds)))
 ## Clearing NAs and applying the max cost instead
 # ds[is.na(ds)] <- 17
 ## I believe this would actually be the organic data set
 # df <- data.frame("ID"=colnames(ds),t(ds))
 # 
 # df[as.matrix(df)==0] <- 17
 # 
 # assigned <- df |> 
 #   group_assignment(cap_classes = rep(8, 17),excess_space = 1)
 # 
 # 
 # assigned$`Group assignment`
 # 
 # assigned$`Cost evaluation` |> assignment_plot(1:5)
 ## Special cases to consider
 ## - duplicate scores
 ## - missing scores
 ## - Pre-assignment of special cases - SOLVED
 ## 
 ## I believe we are ready for a shiny app!
 # pre_grouped <- data.frame("ID"=sample(df$ID,10),"group"=sample(1:17,10))
 # assigned <- df |> 
 #   group_assignment(excess_space = 1.05,
 #                    pre_assign = pre_grouped)
 # 
 # lengths(assigned[[1]])