Contents

1 Introduction

miaSim implements tools for simulating microbial community data based on various ecological models. These can be used to simulate species abundance matrices, including time series. A detailed function documentation can be viewed at the function reference page

1.1 Installation

Install the Bioconductor release version with

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

Load the library

library(miaSim)

1.2 Examples

1.2.1 Generating species interaction matrices

Some of the models rely on interaction matrices that represents interaction heterogeneity between species. The interaction matrix can be generated with different distributional assumptions.

Generate interactions from normal distribution:

A_normal <- powerlawA(n_species = 4, alpha = 3)

Generate interactions from uniform distribution:

A_uniform <- randomA(n_species = 10, diagonal = -0.4, connectance = 0.5, interactions = runif(n = 10^2, min = -0.8, max = 0.8))

1.2.2 Generalized Lotka-Volterra (gLV)

The generalized Lotka-Volterra simulation model generates time-series assuming microbial population dynamics and interaction.

glvmodel <- simulateGLV(n_species = 4, A = A_normal, t_start = 0, 
    t_store = 100, t_end=100, stochastic = FALSE, norm = FALSE)

miaViz::plotSeries(glvmodel, "time")

1.2.3 Ricker model

Ricker model is a discrete version of the gLV:

rickermodel <- simulateRicker(n_species=4, A = A_normal, t_end=100, norm = FALSE)

The number of species specified in the interaction matrix must be the same as the species used in the models.

1.2.4 Hubbell model

Hubbell Neutral simulation model characterizes diversity and relative abundance of species in ecological communities assuming migration, births and deaths but no interactions. Losses become replaced by migration or birth.

hubbellmodel <- simulateHubbell(n_species = 8, M = 10, carrying_capacity = 1000,
                                k_events = 50, migration_p = 0.02, t_end = 100)

One can also simulate parameters for the Hubbell model.

hubbellmodelRates <- simulateHubbellRates(x0 = c(0,5,10),
    migration_p = 0.1, metacommunity_probability = NULL, k_events = 1, 
    growth_rates = NULL, norm = FALSE, t_end=100)

miaViz::plotSeries(hubbellmodelRates, "time")

1.2.5 Self-Organised Instability (SOI)

The Self-Organised Instability (SOI) model generates time series for communities and accelerates stochastic simulation.

soimodel <- simulateSOI(n_species = 4, carrying_capacity = 1000,
              A = A_normal, k_events=5, x0 = NULL, t_end = 150, norm = TRUE)

1.2.6 Stochastic logistic model

Stochastic logistic model is used to determine dead and alive counts in community.

logisticmodel <- simulateStochasticLogistic(n_species = 5)

miaViz::plotSeries(logisticmodel, x = "time")

model_transformed <- mia::transformCounts(logisticmodel, method = "relabundance")

1.2.7 Consumer-resource model

The consumer resource model requires the use of the randomE function, which returns a matrix containing the production rates and consumption rates of each species. The resulting matrix is used as a determination of resource consumption efficiency.

crmodel <- simulateConsumerResource(n_species = 2,
             n_resources = 4,
         E = randomE(n_species = 2, n_resources = 4))

miaViz::plotSeries(crmodel, "time")

# example to get relative abundance and relative proportion of resources
#'norm = TRUE' can be added as a parameter.

# convert to relative abundance
ExampleCR <- mia::transformCounts(crmodel, method = "relabundance")

miaViz::plotSeries(ExampleCR, "time")
#Recommended standard way to generate a set of n simulations (n=2 here) from a given model
simulations <- lapply(seq_len(2), function (i) {do.call(simulateConsumerResource, params)})

# Visualize the model for the first instance
miaViz::plotSeries(simulations[[1]], "time")

# List state for each community (instance) at its last time point;
# this results in instances x species matrix; means and variances per species can be computed col-wise

communities <-  t(sapply(simulations, function (x) {assay(x, "counts")[, which.max(x$time)]}))

# Some more advanced examples for hardcore users:
 
# test leave-one-out in CRM
.replaceByZero <- function(input_list) { # params_iter$x0 as input_list
     if (!all(length(input_list) == unlist(unique(lapply(input_list, length))))) {
         stop("Length of input_list doesn't match length of element in it.")
     }
     for (i in seq_along(input_list)) {
         input_list[[i]][[i]] <- 0
     }
     return(input_list)
 }

.createParamList <- function(input_param, n_repeat, replace_by_zero = FALSE) {
     res_list <- vector(mode = "list", length = n_repeat)
     for (i in seq_len(n_repeat)) {
         res_list[[i]] <- input_param
     }
res_list <- lapply(seq_len(n_repeat), function (i) {input_param})
 }
# example of generateSimulations
# FIXME: reduce computational load by lowering the number of species and timesteps in the demo
params <- list(
    n_species = 10,
    n_resources = 5,
    E = randomE(
        n_species = 10, n_resources = 5,
        mean_consumption = 1, mean_production = 3
    ),
    x0 = rep(0.001, 10),
    resources = rep(1000, 5),
    monod_constant = matrix(rbeta(10 * 5, 10, 10), nrow = 10, ncol = 5),
    inflow_rate = .5,
    outflow_rate = .5,
    migration_p = 0,
    stochastic = TRUE,
    t_start = 0,
    t_end = 20,
    t_store = 100,
    growth_rates = runif(10),
    norm = FALSE
)
# Test overwrite params
.createParamList <- function(input_param, n_repeat, replace_by_zero = FALSE) {
  res_list <- unname(as.list(data.frame(t(matrix(rep(input_param, n_repeat), nrow = n_repeat)))))
}

paramx0 <- .createParamList(input_param = rep(0.001, 10), n_repeat = 10, 
                            replace_by_zero = TRUE)
paramresources <- .createParamList(input_param = rep(1000, 5), n_repeat = 10)
params_iter <- list(x0 = paramx0, resources = paramresources)
simulations <- lapply(seq_len(2), function (i) {do.call(simulateConsumerResource, params)})
simulations_2 <- .generateSimulations(
    model = "simulateConsumerResource",
    params_list = params, param_iter = params_iter, n_instances = 1, t_end = 20
)

estimatedA <- .estimateAFromSimulations(simulations, simulations_2, n_instances = 1,
    scale_off_diagonal = 1, diagonal = -0.5, connectance = 0.2
) / 1000
# Using these parameters with a specified simulator
m <- simulateGLV(n_species = 10, x0 = params$x0,
        A = estimatedA, growth_rates = params$growth_rates, t_end = 20, t_store = 100)

miaViz::plotSeries(m, "time") # Plotting

1.3 Data containers

The simulation functions gives TreeSummarizedExperiment [@TreeSE] object.

This provides access to a broad range of tools for microbiome analysis that support this format (see microbiome.github.io). More examples on can be found at OMA Online Manual. Other fields, such as rowData containing information about the samples, and colData, consisting of sample metadata describing the samples, or phylogenetic trees, can be added as necessary.

For instance, we can use the miaViz R/Bioconductor package to visualize the microbial community time series.

2 Session info

sessionInfo()
## R version 4.3.0 RC (2023-04-13 r84269)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.17-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] miaSim_1.6.0                   TreeSummarizedExperiment_2.8.0
##  [3] Biostrings_2.68.0              XVector_0.40.0                
##  [5] SingleCellExperiment_1.22.0    SummarizedExperiment_1.30.0   
##  [7] Biobase_2.60.0                 GenomicRanges_1.52.0          
##  [9] GenomeInfoDb_1.36.0            IRanges_2.34.0                
## [11] S4Vectors_0.38.0               BiocGenerics_0.46.0           
## [13] MatrixGenerics_1.12.0          matrixStats_0.63.0            
## [15] cluster_2.1.4                  philentropy_0.7.0             
## [17] dplyr_1.1.2                    ape_5.7-1                     
## [19] sna_2.7-1                      statnet.common_4.8.0          
## [21] network_1.18.1                 GGally_2.1.2                  
## [23] colourvalues_0.3.9             ggplot2_3.4.2                 
## [25] BiocStyle_2.28.0              
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          jsonlite_1.8.4             
##   [3] MultiAssayExperiment_1.26.0 magrittr_2.0.3             
##   [5] magick_2.7.4                ggbeeswarm_0.7.1           
##   [7] farver_2.1.1                rmarkdown_2.21             
##   [9] zlibbioc_1.46.0             vctrs_0.6.2                
##  [11] memoise_2.0.1               DelayedMatrixStats_1.22.0  
##  [13] RCurl_1.98-1.12             ggtree_3.8.0               
##  [15] htmltools_0.5.5             BiocNeighbors_1.18.0       
##  [17] deSolve_1.35                gridGraphics_0.5-1         
##  [19] sass_0.4.5                  pracma_2.4.2               
##  [21] bslib_0.4.2                 plyr_1.8.8                 
##  [23] DECIPHER_2.28.0             cachem_1.0.7               
##  [25] igraph_1.4.2                lifecycle_1.0.3            
##  [27] pkgconfig_2.0.3             rsvd_1.0.5                 
##  [29] Matrix_1.5-4                R6_2.5.1                   
##  [31] fastmap_1.1.1               GenomeInfoDbData_1.2.10    
##  [33] aplot_0.1.10                digest_0.6.31              
##  [35] ggnewscale_0.4.8            colorspace_2.1-0           
##  [37] reshape_0.8.9               patchwork_1.1.2            
##  [39] scater_1.28.0               irlba_2.3.5.1              
##  [41] RSQLite_2.3.1               vegan_2.6-4                
##  [43] beachmat_2.16.0             labeling_0.4.2             
##  [45] fansi_1.0.4                 mgcv_1.8-42                
##  [47] polyclip_1.10-4             compiler_4.3.0             
##  [49] bit64_4.0.5                 withr_2.5.0                
##  [51] BiocParallel_1.34.0         viridis_0.6.2              
##  [53] DBI_1.1.3                   highr_0.10                 
##  [55] ggforce_0.4.1               MASS_7.3-59                
##  [57] poweRlaw_0.70.6             DelayedArray_0.26.0        
##  [59] permute_0.9-7               tools_4.3.0                
##  [61] vipor_0.4.5                 beeswarm_0.4.0             
##  [63] glue_1.6.2                  nlme_3.1-162               
##  [65] grid_4.3.0                  mia_1.8.0                  
##  [67] reshape2_1.4.4              generics_0.1.3             
##  [69] gtable_0.3.3                tidyr_1.3.0                
##  [71] BiocSingular_1.16.0         tidygraph_1.2.3            
##  [73] ScaledMatrix_1.8.0          utf8_1.2.3                 
##  [75] ggrepel_0.9.3               pillar_1.9.0               
##  [77] stringr_1.5.0               yulab.utils_0.0.6          
##  [79] splines_4.3.0               tweenr_2.0.2               
##  [81] treeio_1.24.0               lattice_0.21-8             
##  [83] bit_4.0.5                   tidyselect_1.2.0           
##  [85] DirichletMultinomial_1.42.0 scuttle_1.10.0             
##  [87] knitr_1.42                  gridExtra_2.3              
##  [89] bookdown_0.33               xfun_0.39                  
##  [91] graphlayouts_0.8.4          miaViz_1.8.0               
##  [93] stringi_1.7.12              ggfun_0.0.9                
##  [95] lazyeval_0.2.2              yaml_2.3.7                 
##  [97] evaluate_0.20               codetools_0.2-19           
##  [99] ggraph_2.1.0                tibble_3.2.1               
## [101] BiocManager_1.30.20         ggplotify_0.1.0            
## [103] cli_3.6.1                   munsell_0.5.0              
## [105] jquerylib_0.1.4             Rcpp_1.0.10                
## [107] coda_0.19-4                 parallel_4.3.0             
## [109] blob_1.2.4                  sparseMatrixStats_1.12.0   
## [111] bitops_1.0-7                decontam_1.20.0            
## [113] viridisLite_0.4.1           tidytree_0.4.2             
## [115] scales_1.2.1                purrr_1.0.1                
## [117] crayon_1.5.2                rlang_1.1.0