Gains Curve — create_gains_curve • rtichoke

Create a Gains Curve

Usage

create_gains_curve(
  probs,
  reals,
  by = 0.01,
  stratified_by = "probability_threshold",
  chosen_threshold = NA,
  interactive = TRUE,
  color_values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#07004D", "#E6AB02",
    "#FE5F55", "#54494B", "#006E90", "#BC96E6", "#52050A", "#1F271B", "#BE7C4D",
    "#63768D", "#08A045", "#320A28", "#82FF9E", "#2176FF", "#D1603D", "#585123"),
  size = NULL
)

Arguments

probs: a list of vectors of estimated probabilities (one for each model or one for each population)
reals: a list of vectors of binary outcomes (one for each population)
by: number: increment of the sequence.
stratified_by: Performance Metrics can be stratified by Probability Threshold or alternatively by Predicted Positives Condition Rate
chosen_threshold: a chosen threshold to display (for non-interactive)
interactive: whether the plot should be interactive plots
color_values: color palette
size: the size of the curve

Examples

if (FALSE) {

create_gains_curve(
  probs = list(example_dat$estimated_probabilities),
  reals = list(example_dat$outcome)
)

create_gains_curve(
  probs = list(example_dat$estimated_probabilities),
  reals = list(example_dat$outcome),
  stratified_by = "ppcr"
)

create_gains_curve(
  probs = list(
    "First Model" = example_dat$estimated_probabilities,
    "Second Model" = example_dat$random_guess
  ),
  reals = list(example_dat$outcome)
)


create_gains_curve(
  probs = list(
    "First Model" = example_dat$estimated_probabilities,
    "Second Model" = example_dat$random_guess
  ),
  reals = list(example_dat$outcome),
  stratified_by = "ppcr"
)


create_gains_curve(
  probs = list(
    "train" = example_dat %>%
      dplyr::filter(type_of_set == "train") %>%
      dplyr::pull(estimated_probabilities),
    "test" = example_dat %>% dplyr::filter(type_of_set == "test") %>%
      dplyr::pull(estimated_probabilities)
  ),
  reals = list(
    "train" = example_dat %>% dplyr::filter(type_of_set == "train") %>%
      dplyr::pull(outcome),
    "test" = example_dat %>% dplyr::filter(type_of_set == "test") %>%
      dplyr::pull(outcome)
  )
)

create_gains_curve(
  probs = list(
    "train" = example_dat %>%
      dplyr::filter(type_of_set == "train") %>%
      dplyr::pull(estimated_probabilities),
    "test" = example_dat %>% dplyr::filter(type_of_set == "test") %>%
      dplyr::pull(estimated_probabilities)
  ),
  reals = list(
    "train" = example_dat %>% dplyr::filter(type_of_set == "train") %>%
      dplyr::pull(outcome),
    "test" = example_dat %>% dplyr::filter(type_of_set == "test") %>%
      dplyr::pull(outcome)
  ),
  stratified_by = "ppcr"
)
}