Model Selection

R setup

library(eggla)
library(data.table)
library(scales)
library(ggplot2)
library(ggtext)
library(patchwork)

library(forcats)
library(ggbeeswarm)

okabe_ito_palette <- c(
  "#E69F00FF", "#56B4E9FF", "#009E73FF", "#F0E442FF", "#0072B2FF",
  "#D55E00FF", "#CC79A7FF", "#999999FF"
)

Select the “best” model

Note: The following code assumes models-performance.csv from vignette("models-diagnostics") are located in params[["working_directory"]].

output_directory <- file.path(params[["working_directory"]], "models-diagnostics")

files <- list.files(
  path = output_directory,
  pattern = ".*models-performance.csv$",
  full.names = TRUE,
  recursive = TRUE
)

perfs_dt <- rbindlist(lapply(
  X = files,
  FUN = function(x) fread(x)[j = cohort := sub("-.*", "", basename(x))]
))[j = .SD[.N == length(files) * 2], by = c("dataset", "model")]

metrics <- c("AIC", "BIC", "R2_conditional", "R2_marginal", "ICC", "RMSE", "Sigma")

perfs_dt[
  j = c(metrics) := lapply(.SD, function(x) scale(x)[, 1]),
  by = "cohort",
  .SDcols = metrics
]
s_collapse <- function(x) {
  x <- sort(unique(x))
  paste(
    paste(x[-length(x)], collapse = ", "),
    tail(x, 1),
    sep = " and "
  )
}

p <- ggplot(perfs_dt) +
  aes(colour = sub(" --.*", "", model)) +
  geom_boxplot(outlier.shape = NA) +
  geom_beeswarm(shape = 1) +
  stat_summary(geom = "point", fun = mean, shape = 18, colour = okabe_ito_palette[6], size = 4) +
  scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
  theme(
    plot.title = element_markdown(),
    plot.subtitle = element_markdown(face = "italic"),
    plot.caption = element_markdown(face = "italic"),
    axis.title.x = element_markdown(),
    axis.text.x = element_markdown(),
    axis.title.y = element_markdown(),
    axis.text.y = element_markdown(),
    legend.position = "none"
  )

wrap_plots(
  p +
    aes(
      x = fct_reorder(sub(" -- ", "<br>", model), AIC),
      y = AIC
    ) +
    labs(x = NULL),
  p +
    aes(
      x = fct_reorder(sub(" -- ", "<br>", model), RMSE),
      y = RMSE
    ) +
    labs(x = NULL),
  p +
    aes(
      x = fct_reorder(sub(" -- ", "<br>", model), -1 * R2_conditional),
      y = R2_conditional
    ) +
    labs(x = NULL),
  nrow = 3,
  ncol = 1
) +
  plot_annotation(
    title = "Working Models for Male And Female",
    subtitle = sprintf(
      "Cohorts: %s<br>Datasets: %s",
      s_collapse(perfs_dt[["cohort"]]),
      s_collapse(perfs_dt[["dataset"]])
    ),
    tag_levels = "A"
  )

Mickaël Canouil, Ph.D.

R setup

Select the “best” model