Get Started with 'eggla'
Mickaël Canouil, Ph.D. (mickael.canouil@cnrs.fr)
Source:vignettes/eggla.Rmd
eggla.Rmd
Run Using Docker
Create run_eggla.R
script
Copy and edit the following code to a new file (e.g.,
/home/mcanouil/eggla/run_eggla.R
) on the server that will
run the analysis with the appropriate parameters.
library(eggla)
library(data.table)
working_directory <- tempdir() # To be modified by user
res <- try(run_eggla_lmm(
data = fread("/home/mcanouil/eggla/bmigrowth.csv"), # to be changed with the path of the file containing the data
id_variable = "ID",
age_days_variable = NULL, # computed based on "age_years_variable" if not provided. Only used for QC.
age_years_variable = "age",
weight_kilograms_variable = "weight",
height_centimetres_variable = "height",
sex_variable = "sex",
covariates = NULL,
male_coded_zero = FALSE,
random_complexity = "auto",
use_car1 = FALSE,
parallel = FALSE, # to parallelise Daymont QC
parallel_n_chunks = 1, # to parallelise Daymont QC
working_directory = working_directory
))
Run the script using a Docker container
Donwload the (latest) Docker image or a specific version from the GitHub Registry and run it with the following command:
docker run \
--name eggla-docker-analysis \
--user vscode \
--detach \
--rm \
--volume /home/mcanouil/eggla:/tmp/eggla \
ghcr.io/mcanouil/eggla:latest Rscript /tmp/eggla/run_eggla.R
Note: the left-hand side (LHS) of
/home/mcanouil/eggla:/tmp/eggla
is the directory on the
server, the right-hand side is how LHS directory will appear within the
Docker container.
Create run_eggla_gwas.R
Script
Copy and edit the following code to a new file (e.g.,
/home/mcanouil/eggla/run_eggla_gwas.R
) on the server that
will run the analysis with the appropriate parameters.
working_directory <- "/home/mcanouil/eggla"
setwd(working_directory)
library(eggla)
chr_in_parallel <- FALSE # or 11 to run 11 chromosomes in 11 processes/cores
if (!isFALSE(chr_in_parallel)) {
future::plan("multicore", workers = min(chr_in_parallel, future::availableCores()))
}
run_eggla_gwas(
data = "./bmigrowth.csv",
results_zip = list.files(path = ".", pattern = "\\.zip", full.names = TRUE),
id_column = "ID",
traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"),
covariates = c("sex"),
vcfs = list.files(path = file.path(".", "vcf"), pattern = "\\.vcf$|\\.vcf.gz$", full.names = TRUE),
vep_file = NULL,
working_directory = ".",
bin_path = list(
bcftools = "/usr/bin/bcftools",
plink2 = "/usr/bin/plink2"
),
threads = 1
)
Run the script using a Docker container
Donwload the (latest) Docker image or a specific version from the GitHub Registry and run it with the following command:
docker run \
--name eggla-docker-analysis \
--user vscode \
--detach \
--rm \
--volume /home/mcanouil/eggla:/tmp/eggla \
ghcr.io/mcanouil/eggla:latest Rscript /tmp/eggla/run_eggla_gwas.R
Note: the left-hand side (LHS) of
/home/mcanouil/eggla:/tmp/eggla
is the directory on the
server, the right-hand side is how LHS directory will appear within the
Docker container.
Run Non-Interactively (In Bash/Shell)
Create run_eggla.sh
script
Copy and edit the following code to a new file (e.g.,
run_eggla.sh
) on the server that will run the analysis with
the appropriate parameters.
#!/bin/bash
home_analysis="/home/mcanouil/eggla" # to be changed to the folder in which "egg_analysis" is to be performed
mkdir $home_analysis
cd $home_analysis || exit
Rscript <<EOF
temp_library <- file.path(".", "R")
dir.create(temp_library, recursive = TRUE)
.libPaths(temp_library)
utils::install.packages(
pkgs = "pak",
lib = temp_library,
repos = sprintf(
"https://r-lib.github.io/p/pak/stable/%s/%s/%s",
.Platform[["pkgType"]], R.Version()[["os"]], R.Version()[["arch"]]
)
)
library(pak)
utils::download.file(
url = "https://raw.githubusercontent.com/mcanouil/eggla/latest/.github/.devcontainer/eggla/R/pkg.lock",
destfile = "pkg.lock"
)
lockfile_install(lockfile = "pkg.lock", lib = temp_library)
pkg_install("mcanouil/eggla@latest", lib = temp_library, upgrade = FALSE, dependencies = FALSE)
EOF
Rscript <<EOF
library(eggla)
library(data.table)
res <- try(run_eggla_lmm(
data = fread("bmigrowth.csv"), # to be changed with the path of the file containing the data
id_variable = "ID",
age_days_variable = NULL, # computed based on "age_years_variable" if not provided. Only used for QC.
age_years_variable = "age",
weight_kilograms_variable = "weight",
height_centimetres_variable = "height",
sex_variable = "sex",
covariates = NULL,
male_coded_zero = FALSE,
random_complexity = "auto",
use_car1 = FALSE,
parallel = FALSE, # to parallelise Daymont QC
parallel_n_chunks = 1, # to parallelise Daymont QC
working_directory = "."
))
if (inherits(res, "try-error")) unlink(wd, recursive = TRUE)
EOF
Run the GWASs
Run GWASs on slopes and AUCs for male and female.
#!/bin/bash
home_analysis="/home/mcanouil/eggla" # to be changed to the folder in which "egg_analysis" is to be performed
mkdir $home_analysis
cd $home_analysis || exit
Rscript <<EOF
library(eggla)
chr_in_parallel <- FALSE # or 11 to run 11 chromosomes in 11 processes/cores
if (!isFALSE(chr_in_parallel)) {
if (nzchar(system.file(package = "pak"))) {
pak::pkg_install(c("future", "future.apply"))
} else {
install.packages(c("future", "future.apply"))
}
future::plan("multicore", workers = min(chr_in_parallel, future::availableCores()))
}
run_eggla_gwas(
data = "bmigrowth.csv",
results_zip = list.files(path = ".", pattern = "\\\\.zip", full.names = TRUE),
id_column = "ID",
traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"),
covariates = c("sex"),
vcfs = list.files(path = file.path(".", "vcf"), pattern = "\\\\.vcf$|\\\\.vcf.gz$", full.names = TRUE),
vep_file = NULL,
working_directory = ".",
bin_path = list(
bcftools = "/usr/bin/bcftools",
plink2 = "https://s3.amazonaws.com/plink2-assets/plink2_linux_x86_64_20230109.zip"
# check before using: https://www.cog-genomics.org/plink/2.0/
),
threads = 1
)
EOF
Run Interactively In Bash/R
Start R and setup environement
Start R and setup the working directory using pak
to
restore predefined version of packages.
temp_library <- file.path(".", "R")
dir.create(temp_library, recursive = TRUE)
.libPaths(temp_library)
utils::install.packages(
pkgs = "pak",
lib = temp_library,
repos = sprintf(
"https://r-lib.github.io/p/pak/stable/%s/%s/%s",
.Platform[["pkgType"]], R.Version()[["os"]], R.Version()[["arch"]]
)
)
library(pak)
utils::download.file(
url = "https://raw.githubusercontent.com/mcanouil/eggla/latest/.github/.devcontainer/eggla/R/pkg.lock",
destfile = "pkg.lock"
)
lockfile_install(lockfile = "pkg.lock", lib = temp_library)
pkg_install("mcanouil/eggla@v1.0.0", lib = temp_library, upgrade = FALSE, dependencies = FALSE)
Run the analysis
Restart R before running the analysis.
# setwd("/home/mcanouil/eggla") # already set if step 1 was done
library(eggla)
library(data.table)
res <- try(
run_eggla_lmm(
data = fread("bmigrowth.csv"),
id_variable = "ID",
age_days_variable = NULL,
age_years_variable = "age",
weight_kilograms_variable = "weight",
height_centimetres_variable = "height",
sex_variable = "sex",
covariates = NULL,
male_coded_zero = FALSE,
random_complexity = "auto",
use_car1 = FALSE,
parallel = FALSE,
parallel_n_chunks = 1,
working_directory = "."
)
)
if (inherits(res, "try-error")) unlink(wd, recursive = TRUE)
Run the GWASs
Run the GWASs on slopes and AUCs for male and female.
# setwd("/home/mcanouil/eggla") # already set if step 1 was done
library(eggla)
chr_in_parallel <- FALSE # or 11 to run 11 chromosomes in 11 processes/cores
if (!isFALSE(chr_in_parallel)) {
if (nzchar(system.file(package = "pak"))) {
pak::pkg_install(c("future", "future.apply"))
} else {
install.packages(c("future", "future.apply"))
}
future::plan("multicore", workers = min(chr_in_parallel, future::availableCores()))
}
run_eggla_gwas(
data = "bmigrowth.csv",
results_zip = list.files(path = ".", pattern = "\\.zip", full.names = TRUE),
id_column = "ID",
traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"),
covariates = c("sex"),
vcfs = list.files(
path = file.path(".", "vcf"),
pattern = "\\.vcf$|\\.vcf.gz$",
full.names = TRUE
),
vep_file = NULL,
working_directory = ".",
bin_path = list(
bcftools = "/usr/bin/bcftools",
plink2 = "https://s3.amazonaws.com/plink2-assets/plink2_linux_x86_64_20230109.zip"
# check before using: https://www.cog-genomics.org/plink/2.0/
),
threads = 1
)