stRoke/R/win_prob.R



#' @title Calculates the probability of winning
#' @description Calculates the probability of winning (winP). In the referenced
#' article Zou et al (2022) proposes a method for calculating probability of
#' winning with a confidence interval an p-value testing.
#' @param data A data frame containing the response and group variable.
#' @param response The name of the response variable.
#' Takes first column if empty.
#' @param group The name of the group variable.
#' Takes second column if empty.
#' @param alpha The alpha level for the hypothesis test. Default is 0.05.
#' @param beta The beta level for the sample size calculation. Default is 0.2.
#' @param group.ratio The ratio of group sizes. Default is 1.
#' @param sample.size Flag to include sample size calculation. Default is FALSE.
#' @param print.tables Flag to print cumulative tables. Default is FALSE.
#' @param dec Numeric for decimals to print. Default is 3.
#' @return A list containing the win_prob statistics.
#' @export
#' @importFrom stats pnorm qnorm xtabs
#' @source \doi{10.1161/STROKEAHA.121.037744}
#' @examples
#' win_prob(data=stRoke::talos,response="mrs_6",group="rtreat")
win_prob <-
  function(data,
           response = NULL,
           group = NULL,
           alpha = 0.05,
           beta = 0.2,
           group.ratio = 1,
           sample.size = FALSE,
           print.tables = FALSE,
           dec = 3) {
    if (is.null(response)) {
      response <- names(data)[1]
    }

    if (is.null(group)) {
      group <- names(data)[2]
    }

    group_levels <- levels(factor(data[, group]))

    if (length(group_levels) != 2) {
      stop("The group has to contain 2, and only 2 levels.")
    }

    if (!is.numeric(group.ratio)) {
      stop("Group ratio must be a numeric")
    }

    if (!is.logical(sample.size)) {
      stop("Sample size must be a logical")
    }

    # Internal helper function for calculating ranks
    freq_rank <- function(data, x = "Freq") {
      lapply(data, function(i) {
        rank <- c()
        n_i <- nrow(i)
        for (j in seq_len(n_i)) {
          if (j < n_i) {
            rank[j] <-
              ((i[j, x] + 1) / 2 + (sum(i[seq_len(n_i)[(j + 1):n_i], x])))
          }
          if (j == n_i) {
            rank[j] <- (i[j, x] + 1) / 2
          }
        }
        cbind(i, rank)
      })
    }

    overall <-
      freq_rank(list(data.frame(xtabs(data = data[c(response)]))))[[1]]

    tbl <- xtabs(data = data[c(response, group)])

    tbl_df <- data.frame(tbl)

    prop_df <- data.frame(proportions(tbl, group))

    df <- cbind(tbl_df, prop = prop_df[, "Freq"])

    list_cum <- split(df, df[, group])

    list_cum <- lapply(list_cum, function(i) {
      data.frame(i, overall_rank = overall$rank)
    })

    list_cum <- freq_rank(list_cum)

    sum_a <- sum(df$Freq[df$rtreat == group_levels[1]])
    sum_b <- sum(df$Freq[df$rtreat == group_levels[2]])

    list_cum[[1]]$win_frac <-
      with(list_cum[[1]], overall_rank - rank) / sum_b
    list_cum[[2]]$win_frac <-
      with(list_cum[[2]], overall_rank - rank) / sum_a

    winP_a <- sum(with(list_cum[[1]], prop * win_frac))
    winP_b <- sum(with(list_cum[[2]], prop * win_frac))

    var_win_frac_a <-
      sum(with(list_cum[[1]], prop * win_frac ^ 2)) - winP_a ^ 2
    var_win_frac_b <-
      sum(with(list_cum[[2]], prop * win_frac ^ 2)) - winP_b ^ 2

    var_win_prob <- var_win_frac_a / sum_a + var_win_frac_b / sum_b

    se_win_prob <- sqrt(var_win_prob)

    ci_up <-
      exp(log(winP_a / (1 - winP_a)) - qnorm(1 - alpha / 2) * se_win_prob /
            (winP_a / (1 - winP_a))) /
      (1 + exp(log(winP_a / (1 - winP_a)) - qnorm(1 - alpha / 2) *
                 se_win_prob / (winP_a / (1 - winP_a))))

    ci_lo <-
      exp(log(winP_b / (1 - winP_b)) + qnorm(1 - alpha / 2) * se_win_prob /
            (winP_b / (1 - winP_b))) /
      (1 + exp(log(winP_b / (1 - winP_b)) + qnorm(1 - alpha / 2) *
                 se_win_prob / (winP_b / (1 - winP_b))))

    test_stat <-
      abs(log(winP_a / (1 - winP_a))) / (se_win_prob / (winP_a * (1 - winP_a)))
    p_val <- 2 * (1 - pnorm(test_stat))

    nnt <- 1 / (winP_a - 0.5)

    ss_n <- NA

    if (sample.size) {
      ss_n <-
        ceiling((group.ratio + 1) / group.ratio *
                  (qnorm(1 - alpha / 2) + qnorm(1 - beta)) ^ 2 *
                  (var_win_frac_a + group.ratio * var_win_frac_b) /
                  (winP_a * (1 - winP_a) * log(winP_a / (1 - winP_a))) ^ 2
        )
    }

    out <- list(
      list_cum = list_cum,
      group_levels = group_levels,
      sum_a = sum_a,
      sum_b = sum_b,
      winP_a = winP_a,
      winP_b = winP_b,
      var_win_frac_a = var_win_frac_a,
      var_win_frac_b = var_win_frac_b,
      var_win_prob = var_win_prob,
      se_win_prob = se_win_prob,
      conf.int = c(ci_lo, ci_up),
      test_stat = test_stat,
      p_val = p_val,
      nnt = nnt,
      ss_n = ss_n,
      param.record = list(
        data = data,
        response = response,
        group = group,
        alpha = alpha,
        beta = beta,
        group.ratio = group.ratio,
        sample.size = sample.size,
        print.tables = print.tables,
        dec = dec
      )
    )
    class(out) <- c("win_Prob", class(out))
    return(out)
  }

#' @title Prints win_prob results
#' @param x win_prob results.
#' @param ... ignored for now
#' @return Prints win_prob statistics.
#' @export
print.win_Prob <- function (x, ...) {
  args <- list(...)

  cat("\t Zou et al's winP (doi: 10.1161/STROKEAHA.121.037744) \n\n")
  cat(
    sprintf(
      "Probability of a random observation in %s group
      will have a higher response score than a random
      observation in %s group:\n\n",
      x$group_levels[2],
      x$group_levels[1]
    )
  )

  cat("  ")

  cat(
    sprintf(
      "\t   winP: %2.3f (%2.3f, %2.3f)      p=%1.4f",
      x$winP_a,
      x$conf.int[1],
      x$conf.int[2],
      x$p_val
    )
  )

  cat("\n")

  cat("--------------------------------------------\n\n")

  cat(sprintf("The numbers needed to treat (NNT) are: %s\n\n",
              ceiling(x$nnt)))

  cat("\n")

  if (x$param.record$sample.size) {
    cat("--------------------------------------------\n\n")

    cat(
      sprintf(
        "\tWith %s/%s ratio = %s and beta = %s
              the sample size needed is: %s\n\n",
        x$group_levels[1],
        x$group_levels[2],
        x$param.record$group.ratio,
        x$param.record$beta,
        x$ss_n
      )
    )
  }

  cat("\n")

  if (x$param.record$print.tables) {
    cat("--------------------------------------------\n\n")

    lc <- lapply(x$list_cum, function(i, t = c("prop", "win_frac")) {
      i[t] <- round(i[t], x$param.record$dec)
      i[, !names(i) == x$param.record$group]
    })

    for (i in x$group_levels) {
      tab <- knitr::kable(lc[[i]], row.names = FALSE)
      cat(sprintf("Results for the %s group:\n", i))
      cat(sprintf("\t%s\n",
                  tab))
      cat("\n")
    }
  }
  return(invisible(x))
}