PhysicalActivityandStrokeOu.../apps/Assignment/group_assign.R

group_assignment <-
  function(ds,
           cap_classes = NULL,
           excess_space = NULL,
           pre_assign = NULL) {
    require(dplyr)
    require(tidyr)
    require(ROI)
    require(ROI.plugin.symphony)
    require(ompr)
    require(ompr.roi)
    
    if (!is.data.frame(ds)){
      stop("Supplied data has to be a data frame, with each row
           are subjects and columns are groups, with the first column being
           subject identifiers")}
    
    ## This program very much trust the user to supply correctly formatted data
    cost <- t(ds[,-1]) #Transpose converts to matrix
    colnames(cost) <- ds[,1]
    
    num_groups <- dim(cost)[1]
    num_sub <- dim(cost)[2]
    
    ## Adding the option to introduce a bit of head room to the classes by
    ## the groups to a little bigger than the smallest possible
    ## Default is to allow for an extra 20 % fill
    if (is.null(excess_space)) {
      excess <- 1.2
    } else {
      excess <- excess_space
    }
    
    # generous round up of capacities
    if (is.null(cap_classes)) {
      capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)
    # } else if (!is.numeric(cap_classes)) {
    #   stop("cap_classes has to be numeric")
    } else if (length(cap_classes)==1){
     capacity <- ceiling(rep(cap_classes,num_groups)*excess)
    } else if (length(cap_classes)==num_groups){
      capacity <- ceiling(cap_classes*excess)
    } else {
      stop("cap_classes has to be either length 1 or same as number of groups")
    }
    
    ## This test should be a little more elegant
    ## pre_assign should be a data.frame or matrix with an ID and assignment column
    with_pre_assign <- FALSE
    if (!is.null(pre_assign)){
      # Setting flag for later and export list
      with_pre_assign <- TRUE
      # Splitting to list for later merging
      pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))
      # Subtracting capacity numbers, to reflect already filled spots
      capacity <- capacity-lengths(pre)
      # Making sure pre_assigned are removed from main data set
      ds <- ds[!ds[[1]] %in% pre_assign[[1]],]
      
      cost <- t(ds[,-1])
      colnames(cost) <- ds[,1]
      
      num_groups <- dim(cost)[1]
      num_sub <- dim(cost)[2]
    }
    
    ## Simple NA handling. Better to handle NAs yourself!
    cost[is.na(cost)] <- num_groups
    
    i_m <- seq_len(num_groups)
    j_m <- seq_len(num_sub)
    
    m <- MIPModel() %>%
      add_variable(grp[i, j],
                   i = i_m,
                   j = j_m,
                   type = "binary") %>%
      ## The first constraint says that group size should not exceed capacity
      add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],
                     i = i_m) %>%
      ## The second constraint says each subject can only be in one group
      add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%
      ## The objective is set to minimize the cost of the assignments
      ## Giving subjects the group with the highest possible ranking
      set_objective(sum_expr(
        cost[i, j] * grp[i, j],
        i = i_m,
        j = j_m
      ),
      "min") %>%
      solve_model(with_ROI(solver = "symphony", verbosity = 1))
    
    ## Getting assignments
    solution <- get_solution(m, grp[i, j]) %>% filter(value > 0)
    
    assign <- solution |> select(i,j)
    
    if (!is.null(rownames(cost))){
      assign$i <- rownames(cost)[assign$i]
    }

    if (!is.null(colnames(cost))){
      assign$j <- colnames(cost)[assign$j]
    }
    
    ## Splitting into groups based on assignment
    assign_ls <- split(assign$j,assign$i)
    
    
    ## Extracting subject cost for the final assignment for evaluation
    if (is.null(rownames(cost))){
      rownames(cost) <- seq_len(nrow(cost))
    }
    
    if (is.null(colnames(cost))){
      colnames(cost) <- seq_len(ncol(cost))
    }
    
    eval <- lapply(seq_len(length(assign_ls)),function(i){
      ndx <- match(names(assign_ls)[i],rownames(cost))
      cost[ndx,assign_ls[[i]]]
    })
    names(eval) <- names(assign_ls)
    
    if (with_pre_assign){
      names(pre) <- names(assign_ls)
      assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)
      
      out <- list(all_assigned=assign_all)
    } else {
      out <- list(all_assigned=assign_ls)
    }
    
    export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){
      cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])
    }))
    
    out <- append(out,
                  list(evaluation=eval,
                       assigned=assign_ls,
                       solution = solution,
                       capacity = capacity,
                       excess = excess,
                       pre_assign = with_pre_assign,
                       cost_scale = levels(factor(cost)),
                       input=ds,
                       export=export))
    # exists("excess")
    return(out)
  }


## Assessment performance overview
## The function plots costs of assignment for each subject in every group
assignment_plot <- function(lst){
  
  dl <- lst[[2]]
  cost_scale <- unique(lst[[8]])
  cap <- lst[[5]]
  cnts_ls <- lapply(dl,function(i){
    factor(i,levels=cost_scale)
  })
  require(ggplot2)
  require(patchwork)
  require(viridisLite)
  
  y_max <- max(lengths(dl))
  
  wrap_plots(lapply(seq_along(dl),function(i){
    ttl <- names(dl)[i]
    ns <- length(dl[[i]])
    cnts <- cnts_ls[[i]]
    ggplot() + geom_bar(aes(cnts,fill=cnts)) +
      scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +
      scale_y_continuous(name = NULL, limits = c(0,y_max)) + 
      scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +
      guides(fill=FALSE) + labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1) ,")"))
  })) 
}


## Helper function for Shiny
file_extension <- function(filenames) {
  sub(pattern = "^(.*\\.|[^.]+)(?=[^.]*)", replacement = "", filenames, perl = TRUE)
}
originally just a small idea. Took way more time. 2023-09-11 14:29:17 -07:00			`group_assignment <-`
			`function(ds,`
			`cap_classes = NULL,`
			`excess_space = NULL,`
			`pre_assign = NULL) {`
			`require(dplyr)`
			`require(tidyr)`
			`require(ROI)`
			`require(ROI.plugin.symphony)`
			`require(ompr)`
			`require(ompr.roi)`

			`if (!is.data.frame(ds)){`
			`stop("Supplied data has to be a data frame, with each row`
			`are subjects and columns are groups, with the first column being`
			`subject identifiers")}`

			`## This program very much trust the user to supply correctly formatted data`
			`cost <- t(ds[,-1]) #Transpose converts to matrix`
			`colnames(cost) <- ds[,1]`

			`num_groups <- dim(cost)[1]`
			`num_sub <- dim(cost)[2]`

			`## Adding the option to introduce a bit of head room to the classes by`
			`## the groups to a little bigger than the smallest possible`
			`## Default is to allow for an extra 20 % fill`
			`if (is.null(excess_space)) {`
			`excess <- 1.2`
			`} else {`
			`excess <- excess_space`
			`}`

			`# generous round up of capacities`
			`if (is.null(cap_classes)) {`
			`capacity <- rep(ceiling(excess*num_sub/num_groups), num_groups)`
			`# } else if (!is.numeric(cap_classes)) {`
			`# stop("cap_classes has to be numeric")`
			`} else if (length(cap_classes)==1){`
			`capacity <- ceiling(rep(cap_classes,num_groups)*excess)`
			`} else if (length(cap_classes)==num_groups){`
			`capacity <- ceiling(cap_classes*excess)`
			`} else {`
			`stop("cap_classes has to be either length 1 or same as number of groups")`
			`}`

			`## This test should be a little more elegant`
			`## pre_assign should be a data.frame or matrix with an ID and assignment column`
			`with_pre_assign <- FALSE`
			`if (!is.null(pre_assign)){`
			`# Setting flag for later and export list`
			`with_pre_assign <- TRUE`
			`# Splitting to list for later merging`
			`pre <- split(pre_assign[,1],factor(pre_assign[,2],levels = seq_len(num_groups)))`
			`# Subtracting capacity numbers, to reflect already filled spots`
			`capacity <- capacity-lengths(pre)`
			`# Making sure pre_assigned are removed from main data set`
			`ds <- ds[!ds[[1]] %in% pre_assign[[1]],]`

			`cost <- t(ds[,-1])`
			`colnames(cost) <- ds[,1]`

			`num_groups <- dim(cost)[1]`
			`num_sub <- dim(cost)[2]`
			`}`

			`## Simple NA handling. Better to handle NAs yourself!`
			`cost[is.na(cost)] <- num_groups`

			`i_m <- seq_len(num_groups)`
			`j_m <- seq_len(num_sub)`

			`m <- MIPModel() %>%`
			`add_variable(grp[i, j],`
			`i = i_m,`
			`j = j_m,`
			`type = "binary") %>%`
			`## The first constraint says that group size should not exceed capacity`
			`add_constraint(sum_expr(grp[i, j], j = j_m) <= capacity[i],`
			`i = i_m) %>%`
			`## The second constraint says each subject can only be in one group`
			`add_constraint(sum_expr(grp[i, j], i = i_m) == 1, j = j_m) %>%`
			`## The objective is set to minimize the cost of the assignments`
			`## Giving subjects the group with the highest possible ranking`
			`set_objective(sum_expr(`
			`cost[i, j] * grp[i, j],`
			`i = i_m,`
			`j = j_m`
			`),`
			`"min") %>%`
			`solve_model(with_ROI(solver = "symphony", verbosity = 1))`

			`## Getting assignments`
			`solution <- get_solution(m, grp[i, j]) %>% filter(value > 0)`

			`assign <- solution \|> select(i,j)`

			`if (!is.null(rownames(cost))){`
			`assign$i <- rownames(cost)[assign$i]`
			`}`

			`if (!is.null(colnames(cost))){`
			`assign$j <- colnames(cost)[assign$j]`
			`}`

			`## Splitting into groups based on assignment`
			`assign_ls <- split(assign$j,assign$i)`


			`## Extracting subject cost for the final assignment for evaluation`
			`if (is.null(rownames(cost))){`
			`rownames(cost) <- seq_len(nrow(cost))`
			`}`

			`if (is.null(colnames(cost))){`
			`colnames(cost) <- seq_len(ncol(cost))`
			`}`

			`eval <- lapply(seq_len(length(assign_ls)),function(i){`
			`ndx <- match(names(assign_ls)[i],rownames(cost))`
			`cost[ndx,assign_ls[[i]]]`
			`})`
			`names(eval) <- names(assign_ls)`

			`if (with_pre_assign){`
			`names(pre) <- names(assign_ls)`
			`assign_all <- mapply(c, assign_ls, pre, SIMPLIFY=FALSE)`

			`out <- list(all_assigned=assign_all)`
			`} else {`
			`out <- list(all_assigned=assign_ls)`
			`}`

			`export <- do.call(rbind,lapply(seq_along(out[[1]]),function(i){`
			`cbind("ID"=out[[1]][[i]],"Group"=names(out[[1]])[i])`
			`}))`

			`out <- append(out,`
			`list(evaluation=eval,`
			`assigned=assign_ls,`
			`solution = solution,`
			`capacity = capacity,`
			`excess = excess,`
			`pre_assign = with_pre_assign,`
			`cost_scale = levels(factor(cost)),`
			`input=ds,`
			`export=export))`
			`# exists("excess")`
			`return(out)`
			`}`


			`## Assessment performance overview`
			`## The function plots costs of assignment for each subject in every group`
			`assignment_plot <- function(lst){`

			`dl <- lst[[2]]`
			`cost_scale <- unique(lst[[8]])`
			`cap <- lst[[5]]`
			`cnts_ls <- lapply(dl,function(i){`
			`factor(i,levels=cost_scale)`
			`})`
			`require(ggplot2)`
			`require(patchwork)`
			`require(viridisLite)`

			`y_max <- max(lengths(dl))`

			`wrap_plots(lapply(seq_along(dl),function(i){`
			`ttl <- names(dl)[i]`
			`ns <- length(dl[[i]])`
			`cnts <- cnts_ls[[i]]`
			`ggplot() + geom_bar(aes(cnts,fill=cnts)) +`
			`scale_x_discrete(name = NULL, breaks=cost_scale, drop=FALSE) +`
			`scale_y_continuous(name = NULL, limits = c(0,y_max)) +`
			`scale_fill_manual(values = viridisLite::viridis(length(cost_scale), direction = -1)) +`
			`guides(fill=FALSE) + labs(title=paste0(ttl," (fill=",round(ns/cap[[i]],1),";m=",round(mean(dl[[i]]),1) ,")"))`
			`}))`
			`}`


			`## Helper function for Shiny`
			`file_extension <- function(filenames) {`
			`sub(pattern = "^(.\\.\|[^.]+)(?=[^.])", replacement = "", filenames, perl = TRUE)`
			`}`