Model Selection
Mickaël Canouil, Ph.D. (mickael.canouil@cnrs.fr)
Source:vignettes/articles/model-selection.Rmd
model-selection.Rmd
Select the “best” model
Note: The following code assumes
models-performance.csv
from
vignette("models-diagnostics")
are located in
params[["working_directory"]]
.
output_directory <- file.path(params[["working_directory"]], "models-diagnostics")
files <- list.files(
path = output_directory,
pattern = ".*models-performance.csv$",
full.names = TRUE,
recursive = TRUE
)
perfs_dt <- rbindlist(lapply(
X = files,
FUN = function(x) fread(x)[j = cohort := sub("-.*", "", basename(x))]
))[j = .SD[.N == length(files) * 2], by = c("dataset", "model")]
metrics <- c("AIC", "BIC", "R2_conditional", "R2_marginal", "ICC", "RMSE", "Sigma")
perfs_dt[
j = c(metrics) := lapply(.SD, function(x) scale(x)[, 1]),
by = "cohort",
.SDcols = metrics
]
s_collapse <- function(x) {
x <- sort(unique(x))
paste(
paste(x[-length(x)], collapse = ", "),
tail(x, 1),
sep = " and "
)
}
p <- ggplot(perfs_dt) +
aes(colour = sub(" --.*", "", model)) +
geom_boxplot(outlier.shape = NA) +
geom_beeswarm(shape = 1) +
stat_summary(geom = "point", fun = mean, shape = 18, colour = okabe_ito_palette[6], size = 4) +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
theme(
plot.title = element_markdown(),
plot.subtitle = element_markdown(face = "italic"),
plot.caption = element_markdown(face = "italic"),
axis.title.x = element_markdown(),
axis.text.x = element_markdown(),
axis.title.y = element_markdown(),
axis.text.y = element_markdown(),
legend.position = "none"
)
wrap_plots(
p +
aes(
x = fct_reorder(sub(" -- ", "<br>", model), AIC),
y = AIC
) +
labs(x = NULL),
p +
aes(
x = fct_reorder(sub(" -- ", "<br>", model), RMSE),
y = RMSE
) +
labs(x = NULL),
p +
aes(
x = fct_reorder(sub(" -- ", "<br>", model), -1 * R2_conditional),
y = R2_conditional
) +
labs(x = NULL),
nrow = 3,
ncol = 1
) +
plot_annotation(
title = "Working Models for Male And Female",
subtitle = sprintf(
"Cohorts: %s<br>Datasets: %s",
s_collapse(perfs_dt[["cohort"]]),
s_collapse(perfs_dt[["dataset"]])
),
tag_levels = "A"
)