Test for association between a set of SNPS/genes and continuous or binary outcomes by including variant characteristic information and using (weighted) score statistics.

Note:

Installation

# Install MiSTr from CRAN:
install.packages("MiSTr")

# Or the the development version from GitHub:
# install.packages("remotes")
remotes::install_github("mcanouil/MiSTr")

MiSTr in Action

library(MiSTr)
data(mist_data)
attach(mist_data)

Continuous Outcome

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1          -0.467            0.284           -1.645
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.103          -1.031            0.097
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 3.4e-05
#>   + PI (mean effect):  
#>     * Score = 2.659
#>     * P-value = 0.103
#>   + TAU (heterogeneous effect):  
#>     * Score = 1464.924
#>     * P-value = 2.37e-05

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1            0.77            0.627            1.228
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.223          -0.475            2.014
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.531
#>   + PI (mean effect):  
#>     * Score = 1.5
#>     * P-value = 0.221
#>   + TAU (heterogeneous effect):  
#>     * Score = 0.217
#>     * P-value = 0.932

With “Average” Effect (π)

mist(
  y = phenotypes[, "y_pi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1           0.809            0.294             2.75
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.007           0.225            1.392
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0376
#>   + PI (mean effect):  
#>     * Score = 7.083
#>     * P-value = 0.00778
#>   + TAU (heterogeneous effect):  
#>     * Score = 185.198
#>     * P-value = 0.793

With “Average” Effect (π) and Heterogeneity (τ)

mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1           0.248            0.321            0.774
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.441          -0.389            0.885
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0307
#>   + PI (mean effect):  
#>     * Score = 0.601
#>     * P-value = 0.438
#>   + TAU (heterogeneous effect):  
#>     * Score = 1006.125
#>     * P-value = 0.0111

With “Average” Effect (π) and With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous"
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1           1.517            0.689            2.201
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1           0.03           0.149            2.884
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.121
#>   + PI (mean effect):  
#>     * Score = 4.661
#>     * P-value = 0.0308
#>   + TAU (heterogeneous effect):  
#>     * Score = 1.427
#>     * P-value = 0.842

Continuous Outcome with Weights

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1          -0.467            0.284           -1.645
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.103          -1.031            0.097
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.000115
#>   + PI (mean effect):  
#>     * Score = 2.659
#>     * P-value = 0.103
#>   + TAU (heterogeneous effect):  
#>     * Score = 171103.8
#>     * P-value = 8.87e-05

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_tau"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[get_same_effect, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1            0.77            0.627            1.228
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.223          -0.475            2.014
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.531
#>   + PI (mean effect):  
#>     * Score = 1.5
#>     * P-value = 0.221
#>   + TAU (heterogeneous effect):  
#>     * Score = 25.306
#>     * P-value = 0.932

With “Average” Effect (π)

mist(
  y = phenotypes[, "y_pi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1           0.809            0.294             2.75
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.007           0.225            1.392
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0383
#>   + PI (mean effect):  
#>     * Score = 7.083
#>     * P-value = 0.00778
#>   + TAU (heterogeneous effect):  
#>     * Score = 23859.6
#>     * P-value = 0.81

With “Average” Effect (π) and Heterogeneity (τ)

mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1           0.248            0.321            0.774
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.441          -0.389            0.885
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.0527
#>   + PI (mean effect):  
#>     * Score = 0.601
#>     * P-value = 0.438
#>   + TAU (heterogeneous effect):  
#>     * Score = 118494.8
#>     * P-value = 0.0212

With “Average” Effect (π) and With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_taupi"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "continuous",
  weight.beta = c(1, 25),
  maf = variants_info[get_same_effect, "maf"]
)
#> [MiSTr] Linear regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1   Mcluster1           1.517            0.689            2.201
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1           0.03           0.149            2.884
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.121
#>   + PI (mean effect):  
#>     * Score = 4.661
#>     * P-value = 0.0308
#>   + TAU (heterogeneous effect):  
#>     * Score = 166.235
#>     * P-value = 0.842

Binary Outcome

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "binary"
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1          GZ           3.576            0.344              3.7
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1              0           1.935            7.528
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 6.54e-05
#>   + PI (mean effect):  
#>     * Score = 17.527
#>     * P-value = 2.83e-05
#>   + TAU (heterogeneous effect):  
#>     * Score = 5.4
#>     * P-value = 0.175

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "binary"
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1          GZ           3.279            0.645            1.841
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.066           1.064           14.571
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.154
#>   + PI (mean effect):  
#>     * Score = 3.969
#>     * P-value = 0.0463
#>   + TAU (heterogeneous effect):  
#>     * Score = 0.04
#>     * P-value = 0.766

Binary Outcome with Weights

With Heterogeneity (τ)

mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes,
  Z = variants_info[, 1, drop = FALSE],
  method = "liu",
  model = "binary",
  weight.beta = c(1, 25),
  maf = variants_info[, "maf"]
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1          GZ           3.576            0.344              3.7
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1              0           1.935            7.528
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 9.29e-05
#>   + PI (mean effect):  
#>     * Score = 17.527
#>     * P-value = 2.83e-05
#>   + TAU (heterogeneous effect):  
#>     * Score = 616.283
#>     * P-value = 0.256

With Heterogeneity (τ) “Removed”

variants_info[, "effect"] # simulated effect
#>  g_variant1  g_variant2  g_variant3  g_variant4  g_variant5  g_variant6 
#>  -0.1871895  -0.6536863  -0.6635657  -5.7526938  -0.7180167  -0.3196585 
#>  g_variant7  g_variant8  g_variant9 g_variant10 
#>   2.0680578  -0.7141979  -1.8494894   1.2992623
get_same_effect <- names(which(variants_info[, "effect"] > 0))
mist(
  y = phenotypes[, "y_binary"],
  X = phenotypes[, paste0("x_cov", 0:2)],
  G = genotypes[, get_same_effect],
  Z = variants_info[get_same_effect, 1, drop = FALSE],
  method = "liu",
  model = "binary",
  weight.beta = c(1, 25),
  maf = variants_info[get_same_effect, "maf"]
)
#> [MiSTr] Logistic regression is ongoing ...
#> 
#> MiSTr: Mixed effects Score Test
#> -------------------------------
#> 
#> - (Raw) Estimates:
#> 
#>   SubClusters term.pi.hat estimate.pi.hat std.error.pi.hat statistic.pi.hat
#> 1           1          GZ           3.279            0.645            1.841
#>   p.value.pi.hat conf.low.pi.hat conf.high.pi.hat
#> 1          0.066           1.064           14.571
#> 
#> - Statistics:
#> 
#>   + Overall effect: 
#>     * P-value = 0.154
#>   + PI (mean effect):  
#>     * Score = 3.969
#>     * P-value = 0.0463
#>   + TAU (heterogeneous effect):  
#>     * Score = 4.691
#>     * P-value = 0.766