stRoke/R/win_prob.R

244 lines
6.9 KiB
R

#' @title Calculates the probability of winning
#' @description Calculates the probability of winning (winP)
#' @param data A data frame containing the response and group variable.
#' @param response The name of the response variable.
#' Takes first column if empty.
#' @param group The name of the group variable.
#' Takes second column if empty.
#' @param alpha The alpha level for the hypothesis test. Default is 0.05.
#' @param beta The beta level for the sample size calculation. Default is 0.2.
#' @param group.ratio The ratio of group sizes. Default is 1.
#' @param sample.size Flag to include sample size calculation. Default is FALSE.
#' @param print.tables Flag to print cumulative tables. Default is FALSE.
#' @param dec Numeric for decimals to print. Default is 3.
#' @return A list containing the win_prob statistic, the confidence interval,
#' and the p-value.
#' @export
#' @importFrom stats pnorm qnorm xtabs
#' @source \doi{10.1161/STROKEAHA.121.037744}
#' @examples
#' win_prob(data=stRoke::talos,response="mrs_6",group="rtreat")
win_prob <-
function(data,
response = NULL,
group = NULL,
alpha = 0.05,
beta = 0.2,
group.ratio = 1,
sample.size = FALSE,
print.tables = FALSE,
dec = 3) {
if (is.null(response)){
response <- names(data)[1]
}
if (is.null(group)){
group <- names(data)[2]
}
group_levels <- levels(factor(data[, group]))
if (length(group_levels) != 2) {
stop("The group has to contain 2, and only 2 levels.")
}
if (!is.numeric(group.ratio)){
stop("Group ratio must be a numeric")
}
if (!is.logical(sample.size)){
stop("Sample size must be a logical")
}
# Internal helper function for calculating ranks
freq_rank <- function(data, x = "Freq") {
lapply(data, function(i) {
rank <- c()
n_i <- nrow(i)
for (j in seq_len(n_i)) {
if (j < n_i) {
rank[j] <- ((i[j, x] + 1) / 2 + (sum(i[seq_len(n_i)[(j + 1):n_i], x])))
}
if (j == n_i) {
rank[j] <- (i[j, x] + 1) / 2
}
}
cbind(i, rank)
})
}
overall <-
freq_rank(list(data.frame(xtabs(data = data[c(response)]))))[[1]]
tbl <- xtabs(data = data[c(response, group)])
tbl_df <- data.frame(tbl)
prop_df <- data.frame(proportions(tbl, group))
df <- cbind(tbl_df, prop = prop_df[, "Freq"])
list_cum <- split(df, df[, group])
list_cum <- lapply(list_cum, function(i) {
data.frame(i, overall_rank = overall$rank)
})
list_cum <- freq_rank(list_cum)
sum_a <- sum(df$Freq[df$rtreat == group_levels[1]])
sum_b <- sum(df$Freq[df$rtreat == group_levels[2]])
list_cum[[1]]$win_frac <-
with(list_cum[[1]], overall_rank - rank) / sum_b
list_cum[[2]]$win_frac <-
with(list_cum[[2]], overall_rank - rank) / sum_a
winP_a <- sum(with(list_cum[[1]], prop * win_frac))
winP_b <- sum(with(list_cum[[2]], prop * win_frac))
var_win_frac_a <-
sum(with(list_cum[[1]], prop * win_frac ^ 2)) - winP_a ^ 2
var_win_frac_b <-
sum(with(list_cum[[2]], prop * win_frac ^ 2)) - winP_b ^ 2
var_win_prob <- var_win_frac_a / sum_a + var_win_frac_b / sum_b
se_win_prob <- sqrt(var_win_prob)
ci_up <-
exp(log(winP_a / (1 - winP_a)) - qnorm(1 - alpha / 2) * se_win_prob /
(winP_a / (1 - winP_a))) /
(1 + exp(log(winP_a / (1 - winP_a)) - qnorm(1 - alpha / 2) *
se_win_prob / (winP_a / (1 - winP_a))))
ci_lo <-
exp(log(winP_b / (1 - winP_b)) + qnorm(1 - alpha / 2) * se_win_prob /
(winP_b / (1 - winP_b))) /
(1 + exp(log(winP_b / (1 - winP_b)) + qnorm(1 - alpha / 2) *
se_win_prob / (winP_b / (1 - winP_b))))
test_stat <-
abs(log(winP_a / (1 - winP_a))) / (se_win_prob / (winP_a * (1 - winP_a)))
p_val <- 2 * (1 - pnorm(test_stat))
nnt <- 1 / (winP_a - 0.5)
ss_n <- NA
if (sample.size) {
ss_n <-
ceiling((group.ratio + 1) / group.ratio *
(qnorm(1 - alpha / 2) + qnorm(1 - beta)) ^ 2 *
(var_win_frac_a + group.ratio * var_win_frac_b) /
(winP_a * (1 - winP_a) * log(winP_a / (1 - winP_a))) ^ 2
)
}
out <- list(list_cum=list_cum,
group_levels=group_levels,
sum_a=sum_a,
sum_b=sum_b,
winP_a=winP_a,
winP_b=winP_b,
var_win_frac_a=var_win_frac_a,
var_win_frac_b=var_win_frac_b,
var_win_prob=var_win_prob,
se_win_prob=se_win_prob,
conf.int=c(ci_lo,ci_up),
test_stat=test_stat,
p_val=p_val,
nnt=nnt,
ss_n=ss_n,
param.record = list(
data = data,
response = response,
group = group,
alpha = alpha,
beta = beta,
group.ratio = group.ratio,
sample.size = sample.size,
print.tables = print.tables,
dec = dec
)
)
class(out) <- c("win_Prop", class(out))
return(out)
}
print.win_Prop <- function (x, ...) {
args <- list(...)
cat("\t Zou et al's winP (doi: 10.1161/STROKEAHA.121.037744) \n\n")
cat(
sprintf(
"Probability of a random observation in %s group \nwill have a higher response score than a random\nobservation in %s group:\n\n",
x$group_levels[2],
x$group_levels[1]
)
)
cat(" ")
cat(
sprintf(
"\t winP: %2.3f (%2.3f, %2.3f) p=%1.4f",
x$winP_a,
x$conf.int[1],
x$conf.int[2],
x$p_val
)
)
cat("\n")
cat("--------------------------------------------\n\n")
cat(sprintf("The numbers needed to treat (NNT) are: %s\n\n",
ceiling(x$nnt)))
cat("\n")
if (x$param.record$sample.size) {
cat("--------------------------------------------\n\n")
cat(
sprintf(
"\tWith %s/%s ratio = %s and beta = %s
the sample size needed is: %s\n\n",
x$group_levels[1],
x$group_levels[2],
x$param.record$group.ratio,
x$param.record$beta,
x$ss_n
)
)
}
cat("\n")
if (x$param.record$print.tables) {
cat("--------------------------------------------\n\n")
lc <- lapply(x$list_cum,function(i,t = c("prop","win_frac")){
i[t] <- round(i[t],x$param.record$dec)
i[,!names(i) == x$param.record$group]
})
for (i in x$group_levels) {
tab <- knitr::kable(lc[[i]], row.names = FALSE)
cat(sprintf("Results for the %s group:\n", i))
cat(sprintf("\t%s\n",
tab))
cat("\n")
}
}
return(invisible(x))
}