
Apply summary statistic functions to the simulated data
Source:R/summarise_data.R
summarise_data.Rd
Apply summary statistic functions to the simulated data
Arguments
- data
Data generated by
simulate_model
. Either a named list (when custom data-generating functions were used on user-defined simulation output files) or an object of the classslendr_ts
(when a standard tree-sequence-producing simulation was ran by slendr itself).- functions
A named list of data-generating functions
Examples
slendr::check_dependencies(python = TRUE, quit = TRUE)
library(demografr)
library(slendr)
init_env(quiet = TRUE)
##################################################
# define a model
model <- function(Ne_A, Ne_B, Ne_C, Ne_D, T_AB, T_BC, T_CD, gf_BC) {
A <- population("A", time = 1, N = Ne_A)
B <- population("B", time = T_AB, N = Ne_B, parent = A)
C <- population("C", time = T_BC, N = Ne_C, parent = B)
D <- population("D", time = T_CD, N = Ne_D, parent = C)
gf <- gene_flow(from = B, to = C, start = 9000, end = 9301, rate = gf_BC)
model <- compile_model(
populations = list(A, B, C, D), gene_flow = gf,
generation_time = 1, simulation_length = 10000,
direction = "forward", serialize = FALSE
)
samples <- schedule_sampling(
model, times = 10000,
list(A, 25), list(B, 25), list(C, 25), list(D, 25),
strict = TRUE
)
# when a specific sampling schedule is to be used, both model and samples
# must be returned by the function
return(list(model, samples))
}
##################################################
# set up priors
priors <- list(
Ne_A ~ runif(1000, 3000),
Ne_B ~ runif(100, 1500),
Ne_C ~ runif(5000, 10000),
Ne_D ~ runif(2000, 7000),
T_AB ~ runif(1, 4000),
T_BC ~ runif(3000, 9000),
T_CD ~ runif(5000, 10000),
gf_BC ~ runif(0, 0.3)
)
##################################################
# prepare a list of empirical summary statistics
observed_diversity <- read.table(
system.file("examples/basics_diversity.tsv", package = "demografr"),
header = TRUE
)
observed_divergence <- read.table(
system.file("examples/basics_divergence.tsv", package = "demografr"),
header = TRUE
)
observed_f4 <- read.table(
system.file("examples/basics_f4.tsv", package = "demografr"),
header = TRUE
)
observed <- list(
diversity = observed_diversity,
divergence = observed_divergence,
f4 = observed_f4
)
##################################################
# prepare a list of simulated summary statistics
compute_diversity <- function(ts) {
samples <- ts_names(ts, split = "pop")
ts_diversity(ts, sample_sets = samples)
}
compute_divergence <- function(ts) {
samples <- ts_names(ts, split = "pop")
ts_divergence(ts, sample_sets = samples)
}
compute_f4 <- function(ts) {
samples <- ts_names(ts, split = "pop")
A <- samples["A"]; B <- samples["B"]
C <- samples["C"]; D <- samples["D"]
ts_f4(ts, A, B, C, D)
}
functions <- list(
diversity = compute_diversity,
divergence = compute_divergence,
f4 = compute_f4
)
##################################################
# simulate data from a single model run
# (step #1 of a single ABC replicate simulation)
ts <- simulate_model(model, priors, sequence_length = 1e6, recombination_rate = 1e-8)
##################################################
# simulate data from a single model run
# (step #2 of a single ABC replicate simulation)
summarise_data(ts, functions)
#> $diversity
#> # A tibble: 4 × 2
#> set diversity
#> <chr> <dbl>
#> 1 A 0
#> 2 B 0
#> 3 C 0
#> 4 D 0
#>
#> $divergence
#> # A tibble: 6 × 3
#> x y divergence
#> <chr> <chr> <dbl>
#> 1 A B 0
#> 2 A C 0
#> 3 A D 0
#> 4 B C 0
#> 5 B D 0
#> 6 C D 0
#>
#> $f4
#> # A tibble: 1 × 5
#> W X Y Z f4
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 A B C D 0
#>
##################################################
# validate all components of the ABC inference pipeline
validate_abc(model, priors, functions, observed,
sequence_length = 1e6, recombination_rate = 1e-8)
#> ======================================================================
#> Testing sampling of each prior parameter:
#> - Ne_A ✅
#> - Ne_B ✅
#> - Ne_C ✅
#> - Ne_D ✅
#> - T_AB ✅
#> - T_BC ✅
#> - T_CD ✅
#> - gf_BC ✅
#> ---------------------------------------------------------------------
#> The model is a slendr function
#> ---------------------------------------------------------------------
#> Checking the return statement of the model function... ✅
#> ---------------------------------------------------------------------
#> Checking the presence of required model function arguments...---------------------------------------------------------------------
#> Simulating tree sequence from the given model... ✅
#> ---------------------------------------------------------------------
#> Computing user-defined summary functions:
#> - diversity ✅
#> - divergence ✅
#> - f4 ✅
#> ---------------------------------------------------------------------
#> Checking the format of simulated summary statistics:
#> - diversity [data frame] ✅
#> - divergence [data frame] ✅
#> - f4 [data frame] ✅
#> ======================================================================
#> No issues have been found in the ABC setup!
#
# we're skipping the remaining steps because they are extremely
# computationally intensive for the scope of this example
#
##################################################
# set up paralelization
# library(future)
# plan(multisession, workers = availableCores())
##################################################
# simulate data (summary statistics)
# data <- simulate_abc(
# model, priors, functions, observed, iterations = 10000,
# sequence_length = 10e6, recombination_rate = 1e-8, mutation_rate = 1e-8
# )
##################################################
# perform ABC inference
# abc <- run_abc(data, engine = "abc", tol = 0.01, method = "neuralnet")