group assignment functions

2023-09-08 15:47:39 -07:00 · 2023-09-08 15:47:39 -07:00 · 6fae593845
commit 6fae593845
parent a4c31eab6a
1 changed files with 172 additions and 0 deletions
--- a/projects/assignment.R
+++ b/projects/assignment.R
@ -0,0 +1,172 @@
+group_assignment <-
+  function(ds,
+           cap_classes = NULL,
+           excess_space = NULL,
+           pre_assign = NULL) {
+    require(dplyr)
+    require(tidyr)
+    require(ROI)
+    require(ROI.plugin.symphony)
+    require(ompr)
+    require(ompr.roi)
+    
+    if (!is.data.frame(ds)){
+      stop("Supplied data has to be a data frame, with each row
+           are subjects and columns are groups, with the first column being
+           subject identifiers")}
+    
+    ## This program very much trust the user to supply correctly formatted data
+    cost <- t(ds[-1]) #Transpose converts to matrix
+    
+    num_groups <- dim(cost)[1]
+    num_sub <- dim(cost)[2]
+    
+    ## Adding the option to introduce a bit of head room to the classes by
+    ## the groups to a little bigger than the smallest possible
+    ## Default is to allow for an extra 20 % fill
+    if (is.null(cap_classes)) {
+      if (is.null(excess_space)) {
+        excess <- 1.2
+      } else {
+        excess <- excess_space
+      }
+      capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
+    } else {
+      capacity <- cap_classes
+    }
+    
+    ## This test should be a little more elegant
+    ## pre_assign should be a data.frame or matrix with an ID and assignment column
+    with_pre_assign <- FALSE
+    if (!is.null(pre_assign)){
+      with_pre_assign <- TRUE
+      pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
+      capacity <- capacity-lengths(pre)
+    }
+    
+    
+    i_m <- seq_len(num_groups)
+    j_m <- seq_len(num_sub)
+    
+    m <- MIPModel() %>%
+      add_variable(grp[i, j],
+                   i = i_m,
+                   j = j_m,
+                   type = "binary") %>%
+      ## The first constraint says that group size should not exceed capacity
+      add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
+                     i = i_m) %>%
+      ## The second constraint says each subject can only be in one group
+      add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
+      ## The objective is set to minimize the cost of the assignments
+      ## Giving subjects the group with the highest possible ranking
+      set_objective(sum_expr(
+        cost[i, j] * grp[i, j],
+        i = i_m,
+        j = j_m
+      ),
+      "min") %>%
+      solve_model(with_ROI(solver = "symphony", verbosity = 1))
+    
+    ## Getting assignments
+    assign <- get_solution(m, grp[i, j]) %>%
+      filter(value > 0) |> select(i,j)
+    
+    if (!is.null(rownames(cost))){
+      assign$i <- rownames(cost)[assign$i]
+    }
+    
+    if (!is.null(colnames(cost))){
+      assign$j <- colnames(cost)[assign$j]
+    }
+    
+    ## Splitting into groups based on assignment
+    assign_ls <- split(assign$j,assign$i)
+    
+    
+    ## Extracting subject cost for the final assignment for evaluation
+    if (is.null(rownames(cost))){
+      rownames(cost) <- seq_len(nrow(cost))
+    }
+    
+    if (is.null(colnames(cost))){
+      colnames(cost) <- seq_len(ncol(cost))
+    }
+    
+    eval <- lapply(seq_len(length(assign_ls)),function(i){
+      ndx <- match(names(assign_ls)[i],rownames(cost))
+      cost[ndx,assign_ls[[i]]]
+    })
+    names(eval) <- names(assign_ls)
+    
+    if (with_pre_assign){
+      names(pre) <- names(assign_ls)
+      assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
+      
+      return(list("Group assignment"=assign_all,
+                  "Cost evaluation"=eval,
+                  "Group assigned"=assign_ls))
+    } else {
+    return(list("Group assignment"=assign_ls,
+                "Cost evaluation"=eval))
+      }
+  }
+
+
+## Assessment performance overview
+## The function plots costs of assignment for each subject in every group
+assignment_plot <- function(dl,cost_scale){
+  require(ggplot2)
+  require(patchwork)
+  
+  y_max <- max(lengths(dl))
+  
+  wrap_plots(lapply(seq_along(dl),function(i){
+    ttl <- names(dl)[i]
+    ns <- length(dl[[i]])
+    cnts <- factor(dl[[i]],levels=cost_scale)
+    ggplot() + geom_bar(aes(cnts,fill=cnts)) +
+      scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
+      scale_y_continuous(name = NULL, limits = c(0,y_max)) + 
+      # coord_cartesian(ylim=c(0,1)) + 
+      guides(fill=FALSE) + labs(title=paste0(ttl," (n=",ns,")"))
+  })) 
+}
+
+## Sample data set is generated with rownames and colnames
+# ds <- do.call(cbind,lapply(1:133,function(i){
+#   sample(c(1,2,2,3,4,rep(0,12)),size=17)
+# })) 
+# rownames(ds) <- letters[seq_len(nrow(ds))]
+# colnames(ds) <- paste0("sub",seq_len(ncol(ds)))
+
+## Clearing NAs and applying the max cost instead
+# ds[is.na(ds)] <- 17
+
+## I believe this would actually be the organic data set
+# df <- data.frame("ID"=colnames(ds),t(ds))
+# 
+# df[as.matrix(df)==0] <- 17
+# 
+# assigned <- df |> 
+#   group_assignment(cap_classes = rep(8, 17),excess_space = 1)
+# 
+# 
+# assigned$`Group assignment`
+# 
+# assigned$`Cost evaluation` |> assignment_plot(1:5)
+
+
+## Special cases to consider
+## - duplicate scores
+## - missing scores
+## - Pre-assignment of special cases - SOLVED
+## 
+## I believe we are ready for a shiny app!
+
+# pre_grouped <- data.frame("ID"=sample(df$ID,10),"group"=sample(1:17,10))
+# assigned <- df |> 
+#   group_assignment(excess_space = 1.05,
+#                    pre_assign = pre_grouped)
+# 
+# lengths(assigned[[1]])