Skip to contents

Run Using Docker

Create run_eggla.R script

Copy and edit the following code to a new file (e.g., /home/mcanouil/eggla/run_eggla.R) on the server that will run the analysis with the appropriate parameters.

library(eggla)
library(data.table)
working_directory <- tempdir() # To be modified by user
res <- try(run_eggla_lmm(
  data = fread("/home/mcanouil/eggla/bmigrowth.csv"), # to be changed with the path of the file containing the data
  id_variable = "ID",
  age_days_variable = NULL, # computed based on "age_years_variable" if not provided. Only used for QC.
  age_years_variable = "age",
  weight_kilograms_variable = "weight",
  height_centimetres_variable = "height",
  sex_variable = "sex",
  covariates = NULL,
  male_coded_zero = FALSE,
  random_complexity = "auto",
  use_car1 = FALSE,
  parallel = FALSE, # to parallelise Daymont QC
  parallel_n_chunks = 1, # to parallelise Daymont QC
  working_directory = working_directory
))

Run the script using a Docker container

Donwload the (latest) Docker image or a specific version from the GitHub Registry and run it with the following command:

docker run \
  --name eggla-docker-analysis \
  --user vscode \
  --detach \
  --rm \
  --volume /home/mcanouil/eggla:/tmp/eggla \
  ghcr.io/mcanouil/eggla:latest Rscript /tmp/eggla/run_eggla.R

Note: the left-hand side (LHS) of /home/mcanouil/eggla:/tmp/eggla is the directory on the server, the right-hand side is how LHS directory will appear within the Docker container.

Retrieve the outputs

/home/mcanouil/eggla/
├── 2021-11-23-female.zip
└── 2021-11-23-male.zip

Create run_eggla_gwas.R Script

Copy and edit the following code to a new file (e.g., /home/mcanouil/eggla/run_eggla_gwas.R) on the server that will run the analysis with the appropriate parameters.

working_directory <- "/home/mcanouil/eggla"
setwd(working_directory)
library(eggla)
chr_in_parallel <- FALSE # or 11 to run 11 chromosomes in 11 processes/cores
if (!isFALSE(chr_in_parallel)) {
  future::plan("multicore", workers = min(chr_in_parallel, future::availableCores()))
}
run_eggla_gwas(
  data = "./bmigrowth.csv",
  results_zip = list.files(path = ".", pattern = "\\.zip", full.names = TRUE),
  id_column = "ID",
  traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"),
  covariates = c("sex"),
  vcfs = list.files(path = file.path(".", "vcf"), pattern = "\\.vcf$|\\.vcf.gz$", full.names = TRUE),
  vep_file = NULL,
  working_directory = ".",
  bin_path = list(
    bcftools = "/usr/bin/bcftools",
    plink2 = "/usr/bin/plink2"
  ),
  threads = 1
)

Run the script using a Docker container

Donwload the (latest) Docker image or a specific version from the GitHub Registry and run it with the following command:

docker run \
  --name eggla-docker-analysis \
  --user vscode \
  --detach \
  --rm \
  --volume /home/mcanouil/eggla:/tmp/eggla \
  ghcr.io/mcanouil/eggla:latest Rscript /tmp/eggla/run_eggla_gwas.R

Note: the left-hand side (LHS) of /home/mcanouil/eggla:/tmp/eggla is the directory on the server, the right-hand side is how LHS directory will appear within the Docker container.

Retrieve the outputs

/home/mcanouil/eggla/
├── gwas_software.txt
└── gwas.txt.gz

Run Non-Interactively (In Bash/Shell)

Create run_eggla.sh script

Copy and edit the following code to a new file (e.g., run_eggla.sh) on the server that will run the analysis with the appropriate parameters.

#!/bin/bash

home_analysis="/home/mcanouil/eggla" # to be changed to the folder in which "egg_analysis" is to be performed

mkdir $home_analysis 

cd $home_analysis || exit

Rscript <<EOF
temp_library <- file.path(".", "R")
dir.create(temp_library, recursive = TRUE)
.libPaths(temp_library)
utils::install.packages(
  pkgs = "pak",
  lib = temp_library,
  repos = sprintf(
    "https://r-lib.github.io/p/pak/stable/%s/%s/%s",
    .Platform[["pkgType"]], R.Version()[["os"]], R.Version()[["arch"]]
  )
)
library(pak)
utils::download.file(
  url = "https://raw.githubusercontent.com/mcanouil/eggla/latest/.github/.devcontainer/eggla/R/pkg.lock",
  destfile = "pkg.lock"
)
lockfile_install(lockfile = "pkg.lock", lib = temp_library)
pkg_install("mcanouil/eggla@latest", lib = temp_library, upgrade = FALSE, dependencies = FALSE)
EOF

Rscript <<EOF
library(eggla)
library(data.table)
res <- try(run_eggla_lmm(
  data = fread("bmigrowth.csv"), # to be changed with the path of the file containing the data
  id_variable = "ID",
  age_days_variable = NULL, # computed based on "age_years_variable" if not provided. Only used for QC.
  age_years_variable = "age",
  weight_kilograms_variable = "weight",
  height_centimetres_variable = "height",
  sex_variable = "sex",
  covariates = NULL,
  male_coded_zero = FALSE,
  random_complexity = "auto",
  use_car1 = FALSE,
  parallel = FALSE, # to parallelise Daymont QC
  parallel_n_chunks = 1, # to parallelise Daymont QC
  working_directory = "."
))
if (inherits(res, "try-error")) unlink(wd, recursive = TRUE)
EOF

Run the analysis

bash run_eggla.sh

Retrieve the outputs

/home/mcanouil/eggla/
├── 2021-11-23-female.zip
└── 2021-11-23-male.zip

Run the GWASs

Run GWASs on slopes and AUCs for male and female.

#!/bin/bash

home_analysis="/home/mcanouil/eggla" # to be changed to the folder in which "egg_analysis" is to be performed

mkdir $home_analysis

cd $home_analysis || exit

Rscript <<EOF
library(eggla)
chr_in_parallel <- FALSE # or 11 to run 11 chromosomes in 11 processes/cores
if (!isFALSE(chr_in_parallel)) {
  if (nzchar(system.file(package = "pak"))) {
    pak::pkg_install(c("future", "future.apply"))
  } else {
    install.packages(c("future", "future.apply"))
  }
  future::plan("multicore", workers = min(chr_in_parallel, future::availableCores()))
}
run_eggla_gwas(
  data = "bmigrowth.csv",
  results_zip = list.files(path = ".", pattern = "\\\\.zip", full.names = TRUE),
  id_column = "ID",
  traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"),
  covariates = c("sex"),
  vcfs = list.files(path = file.path(".", "vcf"), pattern = "\\\\.vcf$|\\\\.vcf.gz$", full.names = TRUE),
  vep_file = NULL,
  working_directory = ".",
  bin_path = list(
    bcftools = "/usr/bin/bcftools",
    plink2 = "https://s3.amazonaws.com/plink2-assets/plink2_linux_x86_64_20230109.zip"
    # check before using: https://www.cog-genomics.org/plink/2.0/
  ),
  threads = 1
)
EOF

Retrieve the outputs

/home/mcanouil/eggla/
├── gwas_software.txt
└── gwas.txt.gz

Run Interactively In Bash/R

Create the working directory

home_analysis="/home/mcanouil/eggla" # to be changed to the folder in which "egg_analysis" is to be performed

mkdir $home_analysis 

cd $home_analysis || exit

Start R and setup environement

Start R and setup the working directory using pak to restore predefined version of packages.

temp_library <- file.path(".", "R")
dir.create(temp_library, recursive = TRUE)
.libPaths(temp_library)
utils::install.packages(
  pkgs = "pak",
  lib = temp_library,
  repos = sprintf(
    "https://r-lib.github.io/p/pak/stable/%s/%s/%s",
    .Platform[["pkgType"]], R.Version()[["os"]], R.Version()[["arch"]]
  )
)
library(pak)
utils::download.file(
  url = "https://raw.githubusercontent.com/mcanouil/eggla/latest/.github/.devcontainer/eggla/R/pkg.lock",
  destfile = "pkg.lock"
)
lockfile_install(lockfile = "pkg.lock", lib = temp_library)
pkg_install("mcanouil/eggla@v1.0.0", lib = temp_library, upgrade = FALSE, dependencies = FALSE)

Run the analysis

Restart R before running the analysis.

# setwd("/home/mcanouil/eggla") # already set if step 1 was done
library(eggla)
library(data.table)
res <- try(
  run_eggla_lmm(
    data = fread("bmigrowth.csv"),
    id_variable = "ID",
    age_days_variable = NULL,
    age_years_variable = "age",
    weight_kilograms_variable = "weight",
    height_centimetres_variable = "height",
    sex_variable = "sex",
    covariates = NULL,
    male_coded_zero = FALSE,
    random_complexity = "auto",
    use_car1 = FALSE,
    parallel = FALSE,
    parallel_n_chunks = 1,
    working_directory = "."
  )
)
if (inherits(res, "try-error")) unlink(wd, recursive = TRUE)

Retrieve the outputs

/home/mcanouil/eggla/
├── 2021-11-23-female.zip
└── 2021-11-23-male.zip

Run the GWASs

Run the GWASs on slopes and AUCs for male and female.

# setwd("/home/mcanouil/eggla") # already set if step 1 was done
library(eggla)
chr_in_parallel <- FALSE # or 11 to run 11 chromosomes in 11 processes/cores
if (!isFALSE(chr_in_parallel)) {
  if (nzchar(system.file(package = "pak"))) {
    pak::pkg_install(c("future", "future.apply"))
  } else {
    install.packages(c("future", "future.apply"))
  }
  future::plan("multicore", workers = min(chr_in_parallel, future::availableCores()))
}
run_eggla_gwas(
  data = "bmigrowth.csv",
  results_zip = list.files(path = ".", pattern = "\\.zip", full.names = TRUE),
  id_column = "ID",
  traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"),
  covariates = c("sex"),
  vcfs = list.files(
    path = file.path(".", "vcf"),
    pattern = "\\.vcf$|\\.vcf.gz$",
    full.names = TRUE
  ),
  vep_file = NULL,
  working_directory = ".",
  bin_path = list(
    bcftools = "/usr/bin/bcftools",
    plink2 = "https://s3.amazonaws.com/plink2-assets/plink2_linux_x86_64_20230109.zip"
    # check before using: https://www.cog-genomics.org/plink/2.0/
  ),
  threads = 1
)

Retrieve the outputs

/home/mcanouil/eggla/
├── gwas_software.txt
└── gwas.txt.gz