PhysicalActivityandStrokeOu.../apps/Assignment/group_assign.R

group_assignment <-
  function(ds,
           cap_classes = NULL,
           excess_space = NULL,
           pre_assign = NULL) {
    require(dplyr)
    require(tidyr)
    require(ROI)
    require(ROI.plugin.symphony)
    require(ompr)
    require(ompr.roi)

    if (!is.data.frame(ds)){
      stop("Supplied data has to be a data frame, with each row
           are subjects and columns are groups, with the first column being
           subject identifiers")}

    ## This program very much trust the user to supply correctly formatted data
    cost <- t(ds[,-1]) #Transpose converts to matrix
    colnames(cost) <- ds[,1]

    num_groups <- dim(cost)[1]
    num_sub <- dim(cost)[2]

    ## Adding the option to introduce a bit of head room to the classes by
    ## the groups to a little bigger than the smallest possible
    ## Default is to allow for an extra 20 % fill
    if (is.null(excess_space)) {
      excess <- 1.2
    } else {
      excess <- excess_space
    }

    # generous round up of capacities
    if (is.null(cap_classes)) {
      capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
    # } else if (!is.numeric(cap_classes)) {
    #   stop("cap_classes has to be numeric")
    } else if (length(cap_classes)==1){
     capacity <- ceiling(rep(cap_classes,num_groups)*excess)
    } else if (length(cap_classes)==num_groups){
      capacity <- ceiling(cap_classes*excess)
    } else {
      stop("cap_classes has to be either length 1 or same as number of groups")
    }

    ## This test should be a little more elegant
    ## pre_assign should be a data.frame or matrix with an ID and assignment column
    with_pre_assign <- FALSE
    if (!is.null(pre_assign)){
      # Setting flag for later and export list
      with_pre_assign <- TRUE
      # Splitting to list for later merging
      pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
      # Subtracting capacity numbers, to reflect already filled spots
      capacity <- capacity-lengths(pre)
      # Making sure pre_assigned are removed from main data set
      ds <- ds[!ds[[1]] %in% pre_assign[[1]],]

      cost <- t(ds[,-1])
      colnames(cost) <- ds[,1]

      num_groups <- dim(cost)[1]
      num_sub <- dim(cost)[2]
    }

    ## Simple NA handling. Better to handle NAs yourself!
    cost[is.na(cost)] <- num_groups

    i_m <- seq_len(num_groups)
    j_m <- seq_len(num_sub)

    m <- MIPModel() %>%
      add_variable(grp[i, j],
                   i = i_m,
                   j = j_m,
                   type = "binary") %>%
      ## The first constraint says that group size should not exceed capacity
      add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
                     i = i_m) %>%
      ## The second constraint says each subject can only be in one group
      add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
      ## The objective is set to minimize the cost of the assignments
      ## Giving subjects the group with the highest possible ranking
      set_objective(sum_expr(
        cost[i, j] * grp[i, j],
        i = i_m,
        j = j_m
      ),
      "min") %>%
      solve_model(with_ROI(solver = "symphony", verbosity = 1))

    ## Getting assignments
    solution <- get_solution(m, grp[i, j]) %>% filter(value > 0)

    assign <- solution |> select(i,j)

    if (!is.null(rownames(cost))){
      assign$i <- rownames(cost)[assign$i]
    }

    if (!is.null(colnames(cost))){
      assign$j <- colnames(cost)[assign$j]
    }

    ## Splitting into groups based on assignment
    assign_ls <- split(assign$j,assign$i)


    ## Extracting subject cost for the final assignment for evaluation
    if (is.null(rownames(cost))){
      rownames(cost) <- seq_len(nrow(cost))
    }

    if (is.null(colnames(cost))){
      colnames(cost) <- seq_len(ncol(cost))
    }

    eval <- lapply(seq_len(length(assign_ls)),function(i){
      ndx <- match(names(assign_ls)[i],rownames(cost))
      cost[ndx,assign_ls[[i]]]
    })
    names(eval) <- names(assign_ls)

    if (with_pre_assign){
      names(pre) <- names(assign_ls)
      assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)

      out <- list(all_assigned=assign_all)
    } else {
      out <- list(all_assigned=assign_ls)
    }

    export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
      cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
    }))

    out <- append(out,
                  list(evaluation=eval,
                       assigned=assign_ls,
                       solution = solution,
                       capacity = capacity,
                       excess = excess,
                       pre_assign = with_pre_assign,
                       cost_scale = levels(factor(cost)),
                       input=ds,
                       export=export))
    # exists("excess")
    return(out)
  }


## Assessment performance overview
## The function plots costs of assignment for each subject in every group
assignment_plot <- function(lst){

  dl <- lst[[2]]
  cost_scale <- unique(lst[[8]])
  cap <- lst[[5]]
  cnts_ls <- lapply(dl,function(i){
    factor(i,levels=cost_scale)
  })
  require(ggplot2)
  require(patchwork)
  require(viridisLite)

  y_max <- max(lengths(dl))

  wrap_plots(lapply(seq_along(dl),function(i){
    ttl <- names(dl)[i]
    ns <- length(dl[[i]])
    cnts <- cnts_ls[[i]]
    ggplot() + geom_bar(aes(cnts,fill=cnts)) +
      scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
      scale_y_continuous(name = NULL, limits = c(0,y_max)) +
      scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
      guides(fill=FALSE) + labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1) ,")"))
  }))
}


## Helper function for Shiny
file_extension <- function(filenames) {
  sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
}