
Simulate a single tree sequence from the given inference setup
Source:R/simulate_model.R
simulate_model.Rd
Simulates a tree sequence object from a model with parameters as specified either by sampling from priors or by a given list of fixed parameter values
Usage
simulate_model(
model,
parameters,
sequence_length,
recombination_rate,
mutation_rate = 0,
data = NULL,
format = c("ts", "files"),
engine = NULL,
model_args = NULL,
engine_args = NULL,
attempts = 1000
)
Arguments
- model
Either a slendr model generating function (in which case
engine
must be either "msprime" or "slim", i.e. one of the two of slendr's simulation back ends), or a path to a custom user-defined SLiM or msprime script (in which caseengine
must be "files").- parameters
A list of prior distributions to use for sampling of model parameters
- sequence_length
Amount of sequence to simulate using slendr (in base pairs). Ignored when custom simulations scripts are provided.
- recombination_rate
Recombination rate to use for the simulation Ignored when custom simulations scripts are provided.
- mutation_rate
Mutation rate to use for the simulation Ignored when custom simulations scripts are provided.
- data
A named list of data-generating functions. The names then represent all possible arguments of simulated summary statistic functions.
- format
In which format will the model generate results to be used for computing simulated summary statistics?
- engine
Which simulation engine to use? Values "msprime" and "slim" will use one of the built-in slendr simulation back ends. Which engine will be used is determined by the nature of the
model
. Ifengine = NULL
, then spatial slendr models will by default use the "slim" back end, non-spatial models will use the "msprime" back end, and custom user-defined model scripts will use the "custom" engine. Setting this argument explicitly will change the back ends (where appropriate). Setting this argument for custom simulation script has no effect.- model_args
Optional (non-prior) arguments for the slendr model generating function. Setting this argument for custom simulation script has no effect.
- engine_args
Optional arguments for the slendr simulation back end. Setting this argument for custom simulation script has no effect.
- attempts
Maximum number of attempts to generate prior values for a valid demographic model (i.e. model which generates an output without error, default is 1000)
Value
Either a tree-sequence object of the class slendr_ts
when a slendr model
was simulated, or a path to an output file when a custom simulation script was used.
Details
This function is useful to generate a small tree sequence to be used when developing summary statistic functions inference using demografr.
Examples
slendr::check_dependencies(python = TRUE, quit = TRUE)
library(demografr)
library(slendr)
init_env(quiet = TRUE)
##################################################
# define a model
model <- function(Ne_A, Ne_B, Ne_C, Ne_D, T_AB, T_BC, T_CD, gf_BC) {
A <- population("A", time = 1, N = Ne_A)
B <- population("B", time = T_AB, N = Ne_B, parent = A)
C <- population("C", time = T_BC, N = Ne_C, parent = B)
D <- population("D", time = T_CD, N = Ne_D, parent = C)
gf <- gene_flow(from = B, to = C, start = 9000, end = 9301, rate = gf_BC)
model <- compile_model(
populations = list(A, B, C, D), gene_flow = gf,
generation_time = 1, simulation_length = 10000,
direction = "forward", serialize = FALSE
)
samples <- schedule_sampling(
model, times = 10000,
list(A, 25), list(B, 25), list(C, 25), list(D, 25),
strict = TRUE
)
# when a specific sampling schedule is to be used, both model and samples
# must be returned by the function
return(list(model, samples))
}
##################################################
# set up priors
priors <- list(
Ne_A ~ runif(1000, 3000),
Ne_B ~ runif(100, 1500),
Ne_C ~ runif(5000, 10000),
Ne_D ~ runif(2000, 7000),
T_AB ~ runif(1, 4000),
T_BC ~ runif(3000, 9000),
T_CD ~ runif(5000, 10000),
gf_BC ~ runif(0, 0.3)
)
##################################################
# prepare a list of empirical summary statistics
observed_diversity <- read.table(
system.file("examples/basics_diversity.tsv", package = "demografr"),
header = TRUE
)
observed_divergence <- read.table(
system.file("examples/basics_divergence.tsv", package = "demografr"),
header = TRUE
)
observed_f4 <- read.table(
system.file("examples/basics_f4.tsv", package = "demografr"),
header = TRUE
)
observed <- list(
diversity = observed_diversity,
divergence = observed_divergence,
f4 = observed_f4
)
##################################################
# prepare a list of simulated summary statistics
compute_diversity <- function(ts) {
samples <- ts_names(ts, split = "pop")
ts_diversity(ts, sample_sets = samples)
}
compute_divergence <- function(ts) {
samples <- ts_names(ts, split = "pop")
ts_divergence(ts, sample_sets = samples)
}
compute_f4 <- function(ts) {
samples <- ts_names(ts, split = "pop")
A <- samples["A"]; B <- samples["B"]
C <- samples["C"]; D <- samples["D"]
ts_f4(ts, A, B, C, D)
}
functions <- list(
diversity = compute_diversity,
divergence = compute_divergence,
f4 = compute_f4
)
##################################################
# simulate data from a single model run
# (step #1 of a single ABC replicate simulation)
ts <- simulate_model(model, priors, sequence_length = 1e6, recombination_rate = 1e-8)
##################################################
# simulate data from a single model run
# (step #2 of a single ABC replicate simulation)
summarise_data(ts, functions)
#> $diversity
#> # A tibble: 4 × 2
#> set diversity
#> <chr> <dbl>
#> 1 A 0
#> 2 B 0
#> 3 C 0
#> 4 D 0
#>
#> $divergence
#> # A tibble: 6 × 3
#> x y divergence
#> <chr> <chr> <dbl>
#> 1 A B 0
#> 2 A C 0
#> 3 A D 0
#> 4 B C 0
#> 5 B D 0
#> 6 C D 0
#>
#> $f4
#> # A tibble: 1 × 5
#> W X Y Z f4
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 A B C D 0
#>
##################################################
# validate all components of the ABC inference pipeline
validate_abc(model, priors, functions, observed,
sequence_length = 1e6, recombination_rate = 1e-8)
#> ======================================================================
#> Testing sampling of each prior parameter:
#> - Ne_A ✅
#> - Ne_B ✅
#> - Ne_C ✅
#> - Ne_D ✅
#> - T_AB ✅
#> - T_BC ✅
#> - T_CD ✅
#> - gf_BC ✅
#> ---------------------------------------------------------------------
#> The model is a slendr function
#> ---------------------------------------------------------------------
#> Checking the return statement of the model function... ✅
#> ---------------------------------------------------------------------
#> Checking the presence of required model function arguments...---------------------------------------------------------------------
#> Simulating tree sequence from the given model... ✅
#> ---------------------------------------------------------------------
#> Computing user-defined summary functions:
#> - diversity ✅
#> - divergence ✅
#> - f4 ✅
#> ---------------------------------------------------------------------
#> Checking the format of simulated summary statistics:
#> - diversity [data frame] ✅
#> - divergence [data frame] ✅
#> - f4 [data frame] ✅
#> ======================================================================
#> No issues have been found in the ABC setup!
#
# we're skipping the remaining steps because they are extremely
# computationally intensive for the scope of this example
#
##################################################
# set up paralelization
# library(future)
# plan(multisession, workers = availableCores())
##################################################
# simulate data (summary statistics)
# data <- simulate_abc(
# model, priors, functions, observed, iterations = 10000,
# sequence_length = 10e6, recombination_rate = 1e-8, mutation_rate = 1e-8
# )
##################################################
# perform ABC inference
# abc <- run_abc(data, engine = "abc", tol = 0.01, method = "neuralnet")