Skip to contents

Validates the ABC setup by checking that all priors can be correctly sampled from, that a slendr model resulting from those priors can simulate a tree sequence, and that the user-defined summary functions produce output compatible with the provided empirical summary statistics.

Usage

validate_abc(
  model,
  priors,
  functions,
  observed,
  sequence_length,
  recombination_rate,
  mutation_rate = 0,
  format = c("ts", "files"),
  data = NULL,
  engine = NULL,
  model_args = NULL,
  engine_args = NULL,
  quiet = FALSE,
  attempts = 1000
)

Arguments

model

Either a slendr model generating function (in which case engine must be either "msprime" or "slim", i.e. one of the two of slendr's simulation back ends), or a path to a custom user-defined SLiM or msprime script (in which case engine must be "custom").

priors

A list of prior distributions to use for sampling of model parameters

functions

A named list of summary statistic functions to apply on different simulated results (tree sequences or custom files)

observed

A named list of observed summary statistics

sequence_length

Amount of sequence to simulate using slendr (in base pairs). Ignored when custom simulations scripts are provided.

recombination_rate

Recombination rate to use for the simulation

mutation_rate

Mutation rate to use for the simulation

format

In which format will the model generate results to be used for computing simulated summary statistics?

data

A named list of data-generating functions. The names then represent all possible arguments of simulated summary statistic functions.

engine

Which simulation engine to use? Values "msprime" and "slim" will use one of the built-in slendr simulation back ends. Which engine will be used is determined by the nature of the model. If engine = NULL, then spatial slendr models will by default use the "slim" back end, non-spatial models will use the "msprime" back end, and custom user-defined model scripts will use the "custom" engine. Setting this argument explicitly will change the back ends (where appropriate). Setting this argument for custom simulation script has no effect.

model_args

Optional (non-prior) arguments for the slendr model generating function. Setting this argument for custom simulation script has no effect.

engine_args

Optional arguments for the slendr simulation back end. Setting this argument for custom simulation script has no effect.

quiet

Should the log output of the validation be printed to the console? (Default is TRUE.)

attempts

Maximum number of attempts to generate prior values for a valid demographic model (default is 1000)

Value

No return value. The function is ran for its terminal output.

Examples

slendr::check_dependencies(python = TRUE, quit = TRUE)

library(demografr)

library(slendr)
init_env(quiet = TRUE)

##################################################
# define a model

model <- function(Ne_A, Ne_B, Ne_C, Ne_D, T_AB, T_BC, T_CD, gf_BC) {
  A <- population("A", time = 1,    N = Ne_A)
  B <- population("B", time = T_AB, N = Ne_B, parent = A)
  C <- population("C", time = T_BC, N = Ne_C, parent = B)
  D <- population("D", time = T_CD, N = Ne_D, parent = C)

  gf <- gene_flow(from = B, to = C, start = 9000, end = 9301, rate = gf_BC)

  model <- compile_model(
    populations = list(A, B, C, D), gene_flow = gf,
    generation_time = 1, simulation_length = 10000,
    direction = "forward", serialize = FALSE
  )

  samples <- schedule_sampling(
    model, times = 10000,
    list(A, 25), list(B, 25), list(C, 25), list(D, 25),
    strict = TRUE
  )

  # when a specific sampling schedule is to be used, both model and samples
  # must be returned by the function
  return(list(model, samples))
}

##################################################
# set up priors

priors <- list(
  Ne_A  ~ runif(1000, 3000),
  Ne_B  ~ runif(100,  1500),
  Ne_C  ~ runif(5000, 10000),
  Ne_D  ~ runif(2000, 7000),

  T_AB  ~ runif(1,    4000),
  T_BC  ~ runif(3000, 9000),
  T_CD  ~ runif(5000, 10000),

  gf_BC ~ runif(0, 0.3)
)

##################################################
# prepare a list of empirical summary statistics

observed_diversity <- read.table(
  system.file("examples/basics_diversity.tsv", package = "demografr"),
  header = TRUE
)
observed_divergence <- read.table(
  system.file("examples/basics_divergence.tsv", package = "demografr"),
  header = TRUE
)
observed_f4  <- read.table(
  system.file("examples/basics_f4.tsv", package = "demografr"),
  header = TRUE
)
observed <- list(
  diversity  = observed_diversity,
  divergence = observed_divergence,
  f4         = observed_f4

)

##################################################
# prepare a list of simulated summary statistics

compute_diversity <- function(ts) {
  samples <- ts_names(ts, split = "pop")
  ts_diversity(ts, sample_sets = samples)
}
compute_divergence <- function(ts) {
  samples <- ts_names(ts, split = "pop")
  ts_divergence(ts, sample_sets = samples)
}
compute_f4 <- function(ts) {
  samples <- ts_names(ts, split = "pop")
  A <- samples["A"]; B <- samples["B"]
  C <- samples["C"]; D <- samples["D"]
  ts_f4(ts, A, B, C, D)
}

functions <- list(
  diversity  = compute_diversity,
  divergence = compute_divergence,
  f4         = compute_f4
)

##################################################
# simulate data from a single model run
# (step #1 of a single ABC replicate simulation)

ts <- simulate_model(model, priors, sequence_length = 1e6, recombination_rate = 1e-8)

##################################################
# simulate data from a single model run
# (step #2 of a single ABC replicate simulation)

summarise_data(ts, functions)
#> $diversity
#> # A tibble: 4 × 2
#>   set   diversity
#>   <chr>     <dbl>
#> 1 A             0
#> 2 B             0
#> 3 C             0
#> 4 D             0
#> 
#> $divergence
#> # A tibble: 6 × 3
#>   x     y     divergence
#>   <chr> <chr>      <dbl>
#> 1 A     B              0
#> 2 A     C              0
#> 3 A     D              0
#> 4 B     C              0
#> 5 B     D              0
#> 6 C     D              0
#> 
#> $f4
#> # A tibble: 1 × 5
#>   W     X     Y     Z        f4
#>   <chr> <chr> <chr> <chr> <dbl>
#> 1 A     B     C     D         0
#> 

##################################################
# validate all components of the ABC inference pipeline

validate_abc(model, priors, functions, observed,
             sequence_length = 1e6, recombination_rate = 1e-8)
#> ======================================================================
#> Testing sampling of each prior parameter:
#>   - Ne_A ✅
#>   - Ne_B ✅
#>   - Ne_C ✅
#>   - Ne_D ✅
#>   - T_AB ✅
#>   - T_BC ✅
#>   - T_CD ✅
#>   - gf_BC ✅
#> ---------------------------------------------------------------------
#> The model is a slendr function
#> ---------------------------------------------------------------------
#> Checking the return statement of the model function... ✅
#> ---------------------------------------------------------------------
#> Checking the presence of required model function arguments...---------------------------------------------------------------------
#> Simulating tree sequence from the given model... ✅
#> ---------------------------------------------------------------------
#> Computing user-defined summary functions:
#>   - diversity ✅
#>   - divergence ✅
#>   - f4 ✅
#> ---------------------------------------------------------------------
#> Checking the format of simulated summary statistics:
#>   - diversity [data frame] ✅
#>   - divergence [data frame] ✅
#>   - f4 [data frame] ✅
#> ======================================================================
#> No issues have been found in the ABC setup!

#
# we're skipping the remaining steps because they are extremely
# computationally intensive for the scope of this example
#

##################################################
# set up paralelization

# library(future)
# plan(multisession, workers = availableCores())

##################################################
# simulate data (summary statistics)

# data <- simulate_abc(
#   model, priors, functions, observed, iterations = 10000,
#   sequence_length = 10e6, recombination_rate = 1e-8, mutation_rate = 1e-8
# )

##################################################
# perform ABC inference

# abc <- run_abc(data, engine = "abc", tol = 0.01, method = "neuralnet")