From a3ff78fbc234431023dfd97510303ad422251afb Mon Sep 17 00:00:00 2001 From: Torkel Date: Thu, 21 Dec 2023 20:27:17 +0100 Subject: [PATCH 01/24] init --- docs/src/assets/Project.toml | 50 ++++++ .../jump_simulation_performance.md | 161 ++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 docs/src/assets/Project.toml create mode 100644 docs/src/catalyst_applications/jump_simulation_performance.md diff --git a/docs/src/assets/Project.toml b/docs/src/assets/Project.toml new file mode 100644 index 0000000000..2ea3b3fda9 --- /dev/null +++ b/docs/src/assets/Project.toml @@ -0,0 +1,50 @@ +[deps] +BifurcationKit = "0f109fa4-8a5d-4b75-95aa-f515264e7665" +Catalyst = "479239e8-5488-4da2-87a7-35f2df7eef83" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +HomotopyContinuation = "f213a82b-91d6-5c5d-acf7-10f1c761b327" +Latexify = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" +NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" +Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" +OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" +OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" +PEtab = "48d54b35-e43e-4a66-a5a1-dde6b987cf69" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" +Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" +SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" +SteadyStateDiffEq = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" +StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0" +Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" + +[compat] +BifurcationKit = "0.3" +Catalyst = "13" +DataFrames = "1" +DifferentialEquations = "7.7" +Distributions = "0.25" +Documenter = "0.27" +HomotopyContinuation = "2.6" +Latexify = "0.15, 0.16" +ModelingToolkit = "8.47" +NonlinearSolve = "1.6.0, 2" +Optim = "1" +Optimization = "3.19" +OptimizationOptimisers = "0.1.1" +OrdinaryDiffEq = "6" +PEtab = "2" +Plots = "1.36" +SciMLBase = "~2.5" +SciMLSensitivity = "7.19" +Setfield = "1.1" +SpecialFunctions = "2.1" +SteadyStateDiffEq = "1" +StochasticDiffEq = "6" +Symbolics = "5.0.3" diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md new file mode 100644 index 0000000000..8450a70958 --- /dev/null +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -0,0 +1,161 @@ +# [Advice for performant jump simulations](@id jump_simulation_performance) +We have previously described how to perform *jump simulations* of *chemical reaction network* (CRN) models using e.g. Gillespie's algorithm. These simulations can, however, be highly computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant jump simulations. Furthermore, all jump simulations of Catalyst models are performed using the JumpProcesses.jl package, [which documentation](https://github.com/SciML/JumpProcesses.jl) provides a more extensive description of these. + +#### Brief (and optional) introduction to jump simulations +Jump simulations are continuous time, discrete space, simulations (where the system's state, throughout the simulation, are the discrete copy numbers of each species). The system's state changes at discrete time points (in so-called jumps). For CRNs, these jumps correspond to the occurrence of individual reactions. Here, *stochastic chemical kinetics* describes how the reactions are distributed across time. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). These depend on the state of the system, and must thus be re-computed whenever the system's state changes. Hence, jump simulations' runtimes are heavily dependent on how frequently these propensities must be recomputed. + +Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' runtimes, very roughly, scale with the number of jumps. Runtimes typically become prohibitively expensive for: +- Simulations of large models (either with many different species or where some species occur in large copy numbers). +- Simulations over long time spans. +- Simulations that are performed a large number of times. + +## Managing of solution saving +By default, `solve` saves the value of the solution at the time of every jump. For simulations where a large number of jumps occur, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. To exemplify this, let us consider a simple production/degradation model: +```@example jump_simulation_performance_1 +using Catalyst, JumpProcesses, Plots + +rn = @reaction_network begin + (p,d), 0 <--> X +end +u0 = [:X => 10] +tspan = (0.0, 1000.0) +ps = [:p => 1000.0, :d => 100.0] + +dprob = DiscreteProblem(rn, u0, tspan, ps) +nothing # hide +``` +Let us simulate this model using the default options, and plot the results. Furthermore, we use [BenchmarkTools.jl's](https://github.com/JuliaCI/BenchmarkTools.jl) `@btime` macro to measure the time it takes to plot the output solution: +```@example jump_simulation_performance_1 +using BenchmarkTools +jprob = JumpProblem(rn, dprob, Direct()) +sol = solve(jprob, SSAStepper()) +@btime plot(sol) +``` +This simulation generates a very large number of jumps, with the solution saved and plotted at each time such a jump occurs. If the number of jumps is high enough, the memory limits may be exceeded (causing Julia to crash). Here, we do not reach that limit, however, the performance of the `plot(sol)` is affected by the number of points it must render (rendering too much in a single plot is another potential cause of crashed due to memory strain). + +Next, we provide the `save_positions = (false, false)` option to `JumpProblem`. This turns off the saving of the solution both before and after each jump. +```@example jump_simulation_performance_1 +jprob = JumpProblem(rn, dprob, Direct(); save_positions = (false, false)) +nothing # hide +``` +However, if we were to simulate our model now, the solution would only actually be saved at its initial and final times. To remedy this, we provide the `saveat = 1.0` argument to the `solve` command, causing the solution to be saved at every `1.0`th time unit (at `t` = `0.0`, `1.0`, `2.0`, ... `999.0`, `1000.0`). +```@example jump_simulation_performance_1 +sol = solve(jprob, SSAStepper(),saveat=1.0) +nothing # hide +``` +we can now plot the new solution: +```@example jump_simulation_performance_1 +@btime plot(sol) +``` +Here, we note that the time to plot the simulation was reduced (for this example, admittedly from an already low level). Furthermore, the plots look different since the trajectory is sampled at much sparser time points. + +## Types of jumps +Each reaction in a chemical reaction network model corresponds to a possible jump of the jump simulation. These jumps can be divided into 3 categories: +- `MassActionJump`s: These correspond to reactions which rates are constant throughout the simulation. They are typically generated when the rate contains parameters only. +- `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated when the rate contains variables and parameters only. +- `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated when the rate depends on the time variable ($t$). + +Here are some example reactions for the different types of jumps: +```@example jump_simulation_performance_1 +# `MassActionJump`s +@reaction 1.0, X --> Y +@reaction k, 2X + Y --> X2Y +@reaction k1*k2+k3, 0 --> X + +# `ConstantRateJump`s +@reaction k*log(X), Y --> X +@reaction mm(X,v,K), 0 --> X + +# `VariableRateJump`s +@reaction k*(1+sin(t)), 0 --> X +@reaction X/t, X + Y --> XY +nothing # hide +``` +Throughout a simulation, `VariableRateJump`s' rates require updating more frequently than `ConstantRateJump`s' rates, which in turn requires updating more frequently than `MassActionJump`s' rates. Since the performance of jump simulations is heavily affected by how frequently jump rates are computed, keeping the number of `ConstantRateJump`s and `VariableRateJump`s as small as possible is advantageous. Primarily, there exist two common cases where models are written in a way so that sub-optimal jump types are generated, both of which we describe below. + +#### Unnecessarily putting species in rates rather than in reactions +Sometimes, a rate has been made dependent on a species, where that species instead could have been made part of the actual reaction. Consider a reaction where an enzyme ($E$) catalyses the phosphorylation of a protein ($X$) to phosphorylated form ($Xᵖ$). It can be written in two different forms: +```@example jump_simulation_performance_1 +r1 = @reaction k*E, X --> Xᵖ +r2 = @reaction k, X + E --> Xᵖ + E +nothing # hide +``` +These two reactions will generate identical simulations (this holds for ODE, SDE, and Jump simulations). However, while `r1` will generate a `ConstantRateJump`, `r2` will generate a `MassActionJump`. Hence, if the `r2` form is used, jump simulation performance is, at no cost, improved. Since the two forms are otherwise identical, it is always preferable to, whenever possible, put species in the reactions, rather than the rates. + +#### Using piecewise constant, time-dependant, function instead of callbacks. +Let us consider a reaction with some input that depends on time. We assume that the input is initially $0$, but after some critical time $t=5.0$ it is increased to $1$. This can be implemented through a step function: +```@example jump_simulation_performance_1 +input(t) = t > 5.0 +r = @reaction input(t), 0 --> X +nothing # hide +``` +Here, the production of species $X$ is switched on at the time $t=5.0$. This reaction (which rate depends on `t`) will generate a `VariableRateJump`, which is bad for jump simulation performance. However, since the rate is piecewise constant, it can instead be implemented by setting it to a constant parameter $i$, and then use a [*callback*](@ref advanced_simulations_callbacks) to update it at the critical times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). + +## Jump solver selection +When creating a `JumpProblem`, a specific solver is designated using its third argument. +```@example jump_simulation_performance_2 +using Catalyst, JumpProcesses, Plots + +rn = @reaction_network begin + (p,d), 0 <--> X +end +u0 = [:X => 10] +tspan = (0.0, 1000.0) +ps = [:p => 1000.0, :d => 100.0] + +dprob = DiscreteProblem(rn, u0, tspan, ps) +jprob = JumpProblem(rn, dprob, Direct()) +nothing # hide +``` +Here (as throughout most of Catalyst's documentation) we have used the `Direct()` solver (which corresponds to Gillespie's original direct method [^1][^2], also called the *stochastic simulation algorithm*). This method was originally published in 1976, and since then, many additional methods for performing jump simulations of CRN models have been developed. + +Gillespie's direct method will, after a jump has been performed, recompute the rates of *all* possible jumps in the system. This is typically not required. E.g. consider the following system: +```@example jump_simulation_performance_2 +@reaction_network begin + k1, X1 --> X2 + k2, X2 --> X3 + k3, X3 --> 0 +end +``` +Here, the rate of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions does not depend on the amount of $X3$ in the system. Hence, their rates are unaffected by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have clever ways to determine which rates require recomputing after the occurrence of each reaction, which improves their performance. Many of these depend on so-called dependency graphs (which track which reactions' rates are affected by the occurrence of which reactions). Catalyst automatically builds such dependency graphs, which means that most jump simulators can be used without any additional input. + +A full list of jump simulation algorithms implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^3][^4]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^5]) typically being more performant for larger models. + + +## Hybrid simulations +For some models, copy numbers may vary greatly between different species. E.g. consider a genetic promoter which can either be in an inactive form ($Pᵢ$) or an active form ($Pₐ$). The active promoter produces a molecule ($M$): +```@example jump_simulation_performance_3 +using Catalyst # hide +rn = @reaction_network begin + (kA,kI), Pᵢ <--> Pₐ + p, Pₐ --> Pₐ + M + d, M --> ∅ +end +``` +Let us simulate this model and consider the copy numbers of each individual component: +```@example jump_simulation_performance_2 +using JumpProcesses, Plots # hide +u0 = [:Pᵢ => 1, :Pₐ => 0, :M => 10000] +tspan = (0.0, 5000.0) +p = [:kA => 0.05, :kI => .01, :p => 500.0, :d => 0.0005] + +dprob = DiscreteProblem(rn, u0, tspan, p) +jprob = JumpProblem(rn, dprob, RSSA()) +sol = solve(jprob, SSAStepper()) + +plt_1 = plot(sol; idxs=2) +plt_2 = plot(sol; idxs=3) +plot(plt_1, plt_2, size=(1200,400)) +``` +We note that the copy numbers $Pₐ$ are highly stochastic. Meanwhile, $M$ exists in so large copy numbers that its trajectory can be considered approximately deterministic. For models like this one, jump simulations are required to capture the stochastic behaviour of the promoter. However, the large number of jumps generated by $M$ makes such simulations excessively expensive. Here, *hybrid simulations* can be used. Hybrid simulations employ different simulation strategies for their different components. In our example, the state of the promoter could be simulated using a jump simulation, while $M$ could be simulated using an ODE. + +Hybrid simulations for Catalyst models are currently not supported. However, it is a work in progress. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation. Alternatively, other CRN modelling tools that support hybrid simulations exist. + + +--- +## References +[^1]: [D. T. Gillespie, *A general method for numerically simulating the stochastic time evolution of coupled chemical reactions*, Journal of Computational Physics (1976).](https://www.sciencedirect.com/science/article/abs/pii/0021999176900413) +[^2]: [D. T. Gillespie, *Exact Stochastic Simulation of Coupled Chemical Reactions*, The Journal of Physical Chemistry (1977).](https://pubs.acs.org/doi/10.1021/j100540a008) +[^3]: [V. H. Thanh, C. Priami and R. Zunino, *Efficient rejection-based simulation of biochemical reactions with stochastic noise and delays*, Journal of Chemical Physics (2014).](https://pubmed.ncbi.nlm.nih.gov/25296793/) +[^4]: [V. H. Thanh, R. Zunino and C. Priami, *On the rejection-based algorithm for simulation and analysis of large-scale reaction networks*, Journal of Chemical Physics (2015).](https://pubmed.ncbi.nlm.nih.gov/26133409/) +[^5]: [V. H. Thanh, R. Zunino, and C. Priami, *Efficient constant-time complexity algorithm for stochastic simulation of large reaction networks*, IEEE/ACM Transactions on Computational Biology and Bioinformatics (2017).](https://pubmed.ncbi.nlm.nih.gov/26890923/) From 456aa20bd822650456fc29bfc2b0f585b7e315ef Mon Sep 17 00:00:00 2001 From: Torkel Date: Thu, 21 Dec 2023 20:35:54 +0100 Subject: [PATCH 02/24] up --- docs/src/catalyst_applications/jump_simulation_performance.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 8450a70958..6c31b65627 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -119,8 +119,7 @@ end ``` Here, the rate of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions does not depend on the amount of $X3$ in the system. Hence, their rates are unaffected by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have clever ways to determine which rates require recomputing after the occurrence of each reaction, which improves their performance. Many of these depend on so-called dependency graphs (which track which reactions' rates are affected by the occurrence of which reactions). Catalyst automatically builds such dependency graphs, which means that most jump simulators can be used without any additional input. -A full list of jump simulation algorithms implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^3][^4]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^5]) typically being more performant for larger models. - +A full list of jump simulation method implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^3][^4]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^5]) typically being more performant for larger models. For models that are simulated a large number of times, it can be worthwhile to try a few different jump simulation methods to determine which one is most performant in each given case. ## Hybrid simulations For some models, copy numbers may vary greatly between different species. E.g. consider a genetic promoter which can either be in an inactive form ($Pᵢ$) or an active form ($Pₐ$). The active promoter produces a molecule ($M$): From 0f78bc282edd1c5305f100590a9bebc6b3fedc25 Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Thu, 21 Dec 2023 15:45:16 -0500 Subject: [PATCH 03/24] delte .toml yielded by local doc build --- docs/src/assets/Project.toml | 50 ------------------------------------ 1 file changed, 50 deletions(-) delete mode 100644 docs/src/assets/Project.toml diff --git a/docs/src/assets/Project.toml b/docs/src/assets/Project.toml deleted file mode 100644 index 2ea3b3fda9..0000000000 --- a/docs/src/assets/Project.toml +++ /dev/null @@ -1,50 +0,0 @@ -[deps] -BifurcationKit = "0f109fa4-8a5d-4b75-95aa-f515264e7665" -Catalyst = "479239e8-5488-4da2-87a7-35f2df7eef83" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -HomotopyContinuation = "f213a82b-91d6-5c5d-acf7-10f1c761b327" -Latexify = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" -ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" -NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" -Optim = "429524aa-4258-5aef-a3af-852621145aeb" -Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" -OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" -OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -PEtab = "48d54b35-e43e-4a66-a5a1-dde6b987cf69" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" -SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" -Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" -SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" -SteadyStateDiffEq = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" -StochasticDiffEq = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0" -Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" - -[compat] -BifurcationKit = "0.3" -Catalyst = "13" -DataFrames = "1" -DifferentialEquations = "7.7" -Distributions = "0.25" -Documenter = "0.27" -HomotopyContinuation = "2.6" -Latexify = "0.15, 0.16" -ModelingToolkit = "8.47" -NonlinearSolve = "1.6.0, 2" -Optim = "1" -Optimization = "3.19" -OptimizationOptimisers = "0.1.1" -OrdinaryDiffEq = "6" -PEtab = "2" -Plots = "1.36" -SciMLBase = "~2.5" -SciMLSensitivity = "7.19" -Setfield = "1.1" -SpecialFunctions = "2.1" -SteadyStateDiffEq = "1" -StochasticDiffEq = "6" -Symbolics = "5.0.3" From fdb8445b59d7e5bf8837b297a0f8aad4603f61e0 Mon Sep 17 00:00:00 2001 From: Torkel Date: Mon, 25 Dec 2023 14:44:21 +0100 Subject: [PATCH 04/24] add book reference --- .../jump_simulation_performance.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 6c31b65627..399c2d1ab2 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -9,6 +9,8 @@ Typically, propensities are recomputed only whenever a jump occurs. This means t - Simulations over long time spans. - Simulations that are performed a large number of times. +More throughout description of these types of simulations can be found in the literature [^1]. + ## Managing of solution saving By default, `solve` saves the value of the solution at the time of every jump. For simulations where a large number of jumps occur, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. To exemplify this, let us consider a simple production/degradation model: ```@example jump_simulation_performance_1 @@ -107,7 +109,7 @@ dprob = DiscreteProblem(rn, u0, tspan, ps) jprob = JumpProblem(rn, dprob, Direct()) nothing # hide ``` -Here (as throughout most of Catalyst's documentation) we have used the `Direct()` solver (which corresponds to Gillespie's original direct method [^1][^2], also called the *stochastic simulation algorithm*). This method was originally published in 1976, and since then, many additional methods for performing jump simulations of CRN models have been developed. +Here (as throughout most of Catalyst's documentation) we have used the `Direct()` solver (which corresponds to Gillespie's original direct method [^2][^3], also called the *stochastic simulation algorithm*). This method was originally published in 1976, and since then, many additional methods for performing jump simulations of CRN models have been developed. Gillespie's direct method will, after a jump has been performed, recompute the rates of *all* possible jumps in the system. This is typically not required. E.g. consider the following system: ```@example jump_simulation_performance_2 @@ -119,7 +121,7 @@ end ``` Here, the rate of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions does not depend on the amount of $X3$ in the system. Hence, their rates are unaffected by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have clever ways to determine which rates require recomputing after the occurrence of each reaction, which improves their performance. Many of these depend on so-called dependency graphs (which track which reactions' rates are affected by the occurrence of which reactions). Catalyst automatically builds such dependency graphs, which means that most jump simulators can be used without any additional input. -A full list of jump simulation method implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^3][^4]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^5]) typically being more performant for larger models. For models that are simulated a large number of times, it can be worthwhile to try a few different jump simulation methods to determine which one is most performant in each given case. +A full list of jump simulation method implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^4][^5]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^6]) typically being more performant for larger models. For models that are simulated a large number of times, it can be worthwhile to try a few different jump simulation methods to determine which one is most performant in each given case. ## Hybrid simulations For some models, copy numbers may vary greatly between different species. E.g. consider a genetic promoter which can either be in an inactive form ($Pᵢ$) or an active form ($Pₐ$). The active promoter produces a molecule ($M$): @@ -153,8 +155,9 @@ Hybrid simulations for Catalyst models are currently not supported. However, it --- ## References -[^1]: [D. T. Gillespie, *A general method for numerically simulating the stochastic time evolution of coupled chemical reactions*, Journal of Computational Physics (1976).](https://www.sciencedirect.com/science/article/abs/pii/0021999176900413) -[^2]: [D. T. Gillespie, *Exact Stochastic Simulation of Coupled Chemical Reactions*, The Journal of Physical Chemistry (1977).](https://pubs.acs.org/doi/10.1021/j100540a008) -[^3]: [V. H. Thanh, C. Priami and R. Zunino, *Efficient rejection-based simulation of biochemical reactions with stochastic noise and delays*, Journal of Chemical Physics (2014).](https://pubmed.ncbi.nlm.nih.gov/25296793/) -[^4]: [V. H. Thanh, R. Zunino and C. Priami, *On the rejection-based algorithm for simulation and analysis of large-scale reaction networks*, Journal of Chemical Physics (2015).](https://pubmed.ncbi.nlm.nih.gov/26133409/) -[^5]: [V. H. Thanh, R. Zunino, and C. Priami, *Efficient constant-time complexity algorithm for stochastic simulation of large reaction networks*, IEEE/ACM Transactions on Computational Biology and Bioinformatics (2017).](https://pubmed.ncbi.nlm.nih.gov/26890923/) +[^1]: [L. Marchetti, C. Priami, V. H. Thanh, *Simulation Algorithms for Computational Systems Biology*, Springer (2017).](https://link.springer.com/book/10.1007/978-3-319-63113-4) +[^2]: [D. T. Gillespie, *A general method for numerically simulating the stochastic time evolution of coupled chemical reactions*, Journal of Computational Physics (1976).](https://www.sciencedirect.com/science/article/abs/pii/0021999176900413) +[^3]: [D. T. Gillespie, *Exact Stochastic Simulation of Coupled Chemical Reactions*, The Journal of Physical Chemistry (1977).](https://pubs.acs.org/doi/10.1021/j100540a008) +[^4]: [V. H. Thanh, C. Priami and R. Zunino, *Efficient rejection-based simulation of biochemical reactions with stochastic noise and delays*, Journal of Chemical Physics (2014).](https://pubmed.ncbi.nlm.nih.gov/25296793/) +[^5]: [V. H. Thanh, R. Zunino and C. Priami, *On the rejection-based algorithm for simulation and analysis of large-scale reaction networks*, Journal of Chemical Physics (2015).](https://pubmed.ncbi.nlm.nih.gov/26133409/) +[^6]: [V. H. Thanh, R. Zunino, and C. Priami, *Efficient constant-time complexity algorithm for stochastic simulation of large reaction networks*, IEEE/ACM Transactions on Computational Biology and Bioinformatics (2017).](https://pubmed.ncbi.nlm.nih.gov/26890923/) From c70ab1d77a3345ef8aedac48484cb4218f532108 Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 07:04:47 -0500 Subject: [PATCH 05/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 399c2d1ab2..6447842d8d 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -1,5 +1,5 @@ # [Advice for performant jump simulations](@id jump_simulation_performance) -We have previously described how to perform *jump simulations* of *chemical reaction network* (CRN) models using e.g. Gillespie's algorithm. These simulations can, however, be highly computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant jump simulations. Furthermore, all jump simulations of Catalyst models are performed using the JumpProcesses.jl package, [which documentation](https://github.com/SciML/JumpProcesses.jl) provides a more extensive description of these. +We have previously described how to perform simulations of stochastic chemical kinetics *chemical reaction network* (CRN) jump process models using e.g. Gillespie's algorithm. These simulations can, however, be highly computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant stochastic chemical kinetics simulations, which we will subsequently refer to as jump process simulations. All jump process simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and the available solvers. #### Brief (and optional) introduction to jump simulations Jump simulations are continuous time, discrete space, simulations (where the system's state, throughout the simulation, are the discrete copy numbers of each species). The system's state changes at discrete time points (in so-called jumps). For CRNs, these jumps correspond to the occurrence of individual reactions. Here, *stochastic chemical kinetics* describes how the reactions are distributed across time. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). These depend on the state of the system, and must thus be re-computed whenever the system's state changes. Hence, jump simulations' runtimes are heavily dependent on how frequently these propensities must be recomputed. From 9759bbc49e2cfe5f482888862ea21723690492cc Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 07:06:30 -0500 Subject: [PATCH 06/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 6447842d8d..7afec56dc3 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -2,7 +2,9 @@ We have previously described how to perform simulations of stochastic chemical kinetics *chemical reaction network* (CRN) jump process models using e.g. Gillespie's algorithm. These simulations can, however, be highly computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant stochastic chemical kinetics simulations, which we will subsequently refer to as jump process simulations. All jump process simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and the available solvers. #### Brief (and optional) introduction to jump simulations -Jump simulations are continuous time, discrete space, simulations (where the system's state, throughout the simulation, are the discrete copy numbers of each species). The system's state changes at discrete time points (in so-called jumps). For CRNs, these jumps correspond to the occurrence of individual reactions. Here, *stochastic chemical kinetics* describes how the reactions are distributed across time. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). These depend on the state of the system, and must thus be re-computed whenever the system's state changes. Hence, jump simulations' runtimes are heavily dependent on how frequently these propensities must be recomputed. +Jump processes are continuous-time, discrete-space stochastic processes. Exact realizations of these processes can be generated using Stochastic Simulation Algorithms (SSAs), of which Gillespie's Direct method is one popular choice. In the chemical reaction modeling context the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points, the jump times, when the amount of one or more species are increased by integer amounts (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction represents its rate law, i.e. probability per time that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a propensity of $k*A(t)*B(t)$ at time $t$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. + +The time of the next occurrence of some reaction, and the type of reaction that occurs, are sampled from distributions that depend on the current values of the propensity. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (for example due to a reaction occurring). Hence, jump simulations' run-times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' runtimes, very roughly, scale with the number of jumps. Runtimes typically become prohibitively expensive for: - Simulations of large models (either with many different species or where some species occur in large copy numbers). From 933ead0a9e29d080c76c43dd9e92b4cd64d5155b Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 07:06:55 -0500 Subject: [PATCH 07/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 7afec56dc3..bdb27af697 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -7,7 +7,7 @@ Jump processes are continuous-time, discrete-space stochastic processes. Exact r The time of the next occurrence of some reaction, and the type of reaction that occurs, are sampled from distributions that depend on the current values of the propensity. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (for example due to a reaction occurring). Hence, jump simulations' run-times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' runtimes, very roughly, scale with the number of jumps. Runtimes typically become prohibitively expensive for: -- Simulations of large models (either with many different species or where some species occur in large copy numbers). +- Simulations of large models (i.e. with many different species, where some species occur in large copy numbers, or where many reactions are present in a system). - Simulations over long time spans. - Simulations that are performed a large number of times. From 44e95de989b299fac3579e4b287ca6d1c49a3e10 Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 07:07:07 -0500 Subject: [PATCH 08/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index bdb27af697..4c5f2efce3 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -11,7 +11,7 @@ Typically, propensities are recomputed only whenever a jump occurs. This means t - Simulations over long time spans. - Simulations that are performed a large number of times. -More throughout description of these types of simulations can be found in the literature [^1]. +A more thorough overview of simulation methods for Stochastic Chemical Kinetics jump process models and their computational efficiency is given in [^1]. ## Managing of solution saving By default, `solve` saves the value of the solution at the time of every jump. For simulations where a large number of jumps occur, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. To exemplify this, let us consider a simple production/degradation model: From b2f0dea7dc64e5d35a0dc033e095f22592ebe66c Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 07:07:21 -0500 Subject: [PATCH 09/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 4c5f2efce3..5e7159822d 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -44,7 +44,7 @@ nothing # hide ``` However, if we were to simulate our model now, the solution would only actually be saved at its initial and final times. To remedy this, we provide the `saveat = 1.0` argument to the `solve` command, causing the solution to be saved at every `1.0`th time unit (at `t` = `0.0`, `1.0`, `2.0`, ... `999.0`, `1000.0`). ```@example jump_simulation_performance_1 -sol = solve(jprob, SSAStepper(),saveat=1.0) +sol = solve(jprob, SSAStepper(), saveat = 1.0) nothing # hide ``` we can now plot the new solution: From 515febd3ac1933cf4cd343d2f35b3cf38ef578a7 Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 07:08:14 -0500 Subject: [PATCH 10/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 5e7159822d..a7fc6f0a4b 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -152,7 +152,7 @@ plot(plt_1, plt_2, size=(1200,400)) ``` We note that the copy numbers $Pₐ$ are highly stochastic. Meanwhile, $M$ exists in so large copy numbers that its trajectory can be considered approximately deterministic. For models like this one, jump simulations are required to capture the stochastic behaviour of the promoter. However, the large number of jumps generated by $M$ makes such simulations excessively expensive. Here, *hybrid simulations* can be used. Hybrid simulations employ different simulation strategies for their different components. In our example, the state of the promoter could be simulated using a jump simulation, while $M$ could be simulated using an ODE. -Hybrid simulations for Catalyst models are currently not supported. However, it is a work in progress. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation. Alternatively, other CRN modelling tools that support hybrid simulations exist. +Hybrid simulations for Catalyst models are currently not supported, though they are supported by the lower-level solvers in JumpProcesses and DifferentialEquations. It is work in progress to interface Catalyst to such lower-level solvers, and to provide better optimized hybrid methods. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation, or give suggestions on how one can directly interface with the lower-level hybrid solver interface. --- From fd0300f1e0aae39ddc15a0c853d9491e85365a26 Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 30 Dec 2023 08:53:50 -0500 Subject: [PATCH 11/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index a7fc6f0a4b..4264c3d641 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -6,7 +6,7 @@ Jump processes are continuous-time, discrete-space stochastic processes. Exact r The time of the next occurrence of some reaction, and the type of reaction that occurs, are sampled from distributions that depend on the current values of the propensity. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (for example due to a reaction occurring). Hence, jump simulations' run-times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. -Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' runtimes, very roughly, scale with the number of jumps. Runtimes typically become prohibitively expensive for: +Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' run-times, generally scale with the number of jumps that occur. Run-times typically become prohibitively expensive for: - Simulations of large models (i.e. with many different species, where some species occur in large copy numbers, or where many reactions are present in a system). - Simulations over long time spans. - Simulations that are performed a large number of times. From 929bc2a55041d6d89982730be2fd130a15a4acf5 Mon Sep 17 00:00:00 2001 From: Torkel Date: Sat, 30 Dec 2023 14:54:44 +0100 Subject: [PATCH 12/24] up --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 4264c3d641..379389e964 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -6,7 +6,7 @@ Jump processes are continuous-time, discrete-space stochastic processes. Exact r The time of the next occurrence of some reaction, and the type of reaction that occurs, are sampled from distributions that depend on the current values of the propensity. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (for example due to a reaction occurring). Hence, jump simulations' run-times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. -Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' run-times, generally scale with the number of jumps that occur. Run-times typically become prohibitively expensive for: +Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' run-times generally scale with the number of jumps that occur. Run-times typically become prohibitively expensive for: - Simulations of large models (i.e. with many different species, where some species occur in large copy numbers, or where many reactions are present in a system). - Simulations over long time spans. - Simulations that are performed a large number of times. From 135fe0ea4851da2c0aaf24fa6270dd04a3e3910a Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 4 May 2024 20:00:28 -0400 Subject: [PATCH 13/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 379389e964..9ad76cbe0a 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -93,7 +93,7 @@ input(t) = t > 5.0 r = @reaction input(t), 0 --> X nothing # hide ``` -Here, the production of species $X$ is switched on at the time $t=5.0$. This reaction (which rate depends on `t`) will generate a `VariableRateJump`, which is bad for jump simulation performance. However, since the rate is piecewise constant, it can instead be implemented by setting it to a constant parameter $i$, and then use a [*callback*](@ref advanced_simulations_callbacks) to update it at the critical times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). +Here, the production of species $X$ is switched on at the time $t=5.0$. This reaction (for which the rate depends on `t`) will generate a `VariableRateJump`, which is the least performant jump type in simulations, and can not be used with the standard SSAs of JumpProcesses. However, since the rate is piecewise constant, it can alternatively be implemented by setting it to a constant parameter $i$, and then using a [*discrete callback*](@ref advanced_simulations_callbacks) to update it at the switching times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). Generally such explicit time-discontinuities should be encoded via discrete callbacks instead of as `VariableRateJump`s if possible as simulation methods for the latter typically assume the system's propensities evolve continuously in-between jumps. ## Jump solver selection When creating a `JumpProblem`, a specific solver is designated using its third argument. From 845bf8ab00ad75ee40a2ad8e95b5ea3466f05f5f Mon Sep 17 00:00:00 2001 From: Torkel Date: Sat, 4 May 2024 20:12:42 -0400 Subject: [PATCH 14/24] up --- .../jump_simulation_performance.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 9ad76cbe0a..b384f2f045 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -1,12 +1,12 @@ # [Advice for performant jump simulations](@id jump_simulation_performance) -We have previously described how to perform simulations of stochastic chemical kinetics *chemical reaction network* (CRN) jump process models using e.g. Gillespie's algorithm. These simulations can, however, be highly computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant stochastic chemical kinetics simulations, which we will subsequently refer to as jump process simulations. All jump process simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and the available solvers. +We have previously introduced how to use *stochastic chemical kinetics* to perform jump simulations of *chemical reaction network* (CRRN) models (using e.g. Gillespie's algorithm). These simulations can, however, be very computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant stochastic chemical kinetics simulations, which we will here refer to as jump process simulations. All jump process simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and the available solvers. #### Brief (and optional) introduction to jump simulations -Jump processes are continuous-time, discrete-space stochastic processes. Exact realizations of these processes can be generated using Stochastic Simulation Algorithms (SSAs), of which Gillespie's Direct method is one popular choice. In the chemical reaction modeling context the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points, the jump times, when the amount of one or more species are increased by integer amounts (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction represents its rate law, i.e. probability per time that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a propensity of $k*A(t)*B(t)$ at time $t$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. +Jump processes are continuous-time, discrete-space stochastic processes. Exact realizations of these processes can be generated using Stochastic Simulation Algorithms (SSAs), of which Gillespie's Direct method is the most well-known choice. In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are increased by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $K$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. -The time of the next occurrence of some reaction, and the type of reaction that occurs, are sampled from distributions that depend on the current values of the propensity. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (for example due to a reaction occurring). Hence, jump simulations' run-times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. +During a simulation, after the occurrence of a reaction, the simulation algorithm computes both the time to the next reaction and which reaction will occur at this time point. Both these (reaction time and type) are computed by making random draws from probability distributions that depends on the system's reactions' propensities. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (typically due to the occurrence of a reaction). Hence, jump simulations' run times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. -Typically, propensities are recomputed only whenever a jump occurs. This means that jump simulations' run-times generally scale with the number of jumps that occur. Run-times typically become prohibitively expensive for: +Typically, propensities are recomputed only at jump events. This means that jump simulations' run times generally scale with the number of jumps that occur. Run times typically become prohibitively expensive for: - Simulations of large models (i.e. with many different species, where some species occur in large copy numbers, or where many reactions are present in a system). - Simulations over long time spans. - Simulations that are performed a large number of times. @@ -53,11 +53,14 @@ we can now plot the new solution: ``` Here, we note that the time to plot the simulation was reduced (for this example, admittedly from an already low level). Furthermore, the plots look different since the trajectory is sampled at much sparser time points. +!!! note + With the default saving behavior, evaluating `sol(t)` at any time `t` within the `tspan` gives the *exact* value of the solution at that time (i.e. the vector of the exact number of each species). When using `saveat` and `save_positions = (false,false)` the solution is only saved at the selected time points, and as such `sol(t)` should not be evaluated except at times at which the solution was saved. While evaluating the solution at other times will return values they will not be the exact value of the jump process! + ## Types of jumps Each reaction in a chemical reaction network model corresponds to a possible jump of the jump simulation. These jumps can be divided into 3 categories: - `MassActionJump`s: These correspond to reactions which rates are constant throughout the simulation. They are typically generated when the rate contains parameters only. - `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated when the rate contains variables and parameters only. -- `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated when the rate depends on the time variable ($t$). +- `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated when a reaction's rate depends on the time variable ($t$). Here are some example reactions for the different types of jumps: ```@example jump_simulation_performance_1 From b3386271e04bc93a36550070cc173bcdf105c72d Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 4 May 2024 20:13:34 -0400 Subject: [PATCH 15/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index b384f2f045..1c8e87efaa 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -114,7 +114,7 @@ dprob = DiscreteProblem(rn, u0, tspan, ps) jprob = JumpProblem(rn, dprob, Direct()) nothing # hide ``` -Here (as throughout most of Catalyst's documentation) we have used the `Direct()` solver (which corresponds to Gillespie's original direct method [^2][^3], also called the *stochastic simulation algorithm*). This method was originally published in 1976, and since then, many additional methods for performing jump simulations of CRN models have been developed. +Here (as throughout most of Catalyst's documentation) we have used the `Direct()` SSA solver (which corresponds to Gillespie's original direct method [^2][^3]). This method was originally published in 1976, and since then, many additional methods for simulating stochastic chemical kinetics models have been developed. Gillespie's direct method will, after a jump has been performed, recompute the rates of *all* possible jumps in the system. This is typically not required. E.g. consider the following system: ```@example jump_simulation_performance_2 From 5b632c232557e2b70721e793558f98bbfe27cf0c Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 4 May 2024 20:14:18 -0400 Subject: [PATCH 16/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 1c8e87efaa..71c00f50ee 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -116,7 +116,7 @@ nothing # hide ``` Here (as throughout most of Catalyst's documentation) we have used the `Direct()` SSA solver (which corresponds to Gillespie's original direct method [^2][^3]). This method was originally published in 1976, and since then, many additional methods for simulating stochastic chemical kinetics models have been developed. -Gillespie's direct method will, after a jump has been performed, recompute the rates of *all* possible jumps in the system. This is typically not required. E.g. consider the following system: +Gillespie's direct method will, after a jump has been performed, recompute the propensities of *all* possible jumps in the system (i.e. of all reactions). This is typically not required. E.g. consider the following system: ```@example jump_simulation_performance_2 @reaction_network begin k1, X1 --> X2 From b5e990de911d072846640c7eb04c24de2fd7361f Mon Sep 17 00:00:00 2001 From: Torkel Loman Date: Sat, 4 May 2024 20:14:45 -0400 Subject: [PATCH 17/24] Update docs/src/catalyst_applications/jump_simulation_performance.md Co-authored-by: Sam Isaacson --- docs/src/catalyst_applications/jump_simulation_performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 71c00f50ee..966112dcd1 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -124,7 +124,7 @@ Gillespie's direct method will, after a jump has been performed, recompute the p k3, X3 --> 0 end ``` -Here, the rate of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions does not depend on the amount of $X3$ in the system. Hence, their rates are unaffected by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have clever ways to determine which rates require recomputing after the occurrence of each reaction, which improves their performance. Many of these depend on so-called dependency graphs (which track which reactions' rates are affected by the occurrence of which reactions). Catalyst automatically builds such dependency graphs, which means that most jump simulators can be used without any additional input. +Here, the propensities of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions, `k1*X1*X2` and `k2*X2` respectively, do not depend on the amount of $X3$ in the system. Hence, the value of their propensities are unchanged by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have several ways to determine which rates require recomputing after the occurrence of a reaction, which improves their computational performance. Many of these depend on dependency graphs, which track which other reactions' propensities must be recomputed after the occurrence of a given reaction. Catalyst automatically builds such dependency graphs, which means that all JumpProcesses SSAs can be used without any additional inputs. A full list of jump simulation method implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^4][^5]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^6]) typically being more performant for larger models. For models that are simulated a large number of times, it can be worthwhile to try a few different jump simulation methods to determine which one is most performant in each given case. From ccceea2ca58614e1c27538064742f89bcb89f73d Mon Sep 17 00:00:00 2001 From: Torkel Date: Sat, 4 May 2024 20:46:24 -0400 Subject: [PATCH 18/24] save progress --- .../jump_simulation_performance.md | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/catalyst_applications/jump_simulation_performance.md index 966112dcd1..889a69508a 100644 --- a/docs/src/catalyst_applications/jump_simulation_performance.md +++ b/docs/src/catalyst_applications/jump_simulation_performance.md @@ -1,7 +1,7 @@ # [Advice for performant jump simulations](@id jump_simulation_performance) We have previously introduced how to use *stochastic chemical kinetics* to perform jump simulations of *chemical reaction network* (CRRN) models (using e.g. Gillespie's algorithm). These simulations can, however, be very computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant stochastic chemical kinetics simulations, which we will here refer to as jump process simulations. All jump process simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and the available solvers. -#### Brief (and optional) introduction to jump simulations +### [Brief (and optional) introduction to jump simulations](@id jump_simulation_performance_intro) Jump processes are continuous-time, discrete-space stochastic processes. Exact realizations of these processes can be generated using Stochastic Simulation Algorithms (SSAs), of which Gillespie's Direct method is the most well-known choice. In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are increased by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $K$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. During a simulation, after the occurrence of a reaction, the simulation algorithm computes both the time to the next reaction and which reaction will occur at this time point. Both these (reaction time and type) are computed by making random draws from probability distributions that depends on the system's reactions' propensities. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (typically due to the occurrence of a reaction). Hence, jump simulations' run times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. @@ -13,36 +13,36 @@ Typically, propensities are recomputed only at jump events. This means that jump A more thorough overview of simulation methods for Stochastic Chemical Kinetics jump process models and their computational efficiency is given in [^1]. -## Managing of solution saving -By default, `solve` saves the value of the solution at the time of every jump. For simulations where a large number of jumps occur, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. To exemplify this, let us consider a simple production/degradation model: +## [Managing of solution saving](@id jump_simulation_performance_solution_saving) +By default, `solve` saves the value of the solution at the time of every jump. For simulations with a large number of jump events, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. Let us consider a simple [birth-death model](@ref ref): ```@example jump_simulation_performance_1 using Catalyst, JumpProcesses, Plots -rn = @reaction_network begin +bd_model = @reaction_network begin (p,d), 0 <--> X end u0 = [:X => 10] tspan = (0.0, 1000.0) ps = [:p => 1000.0, :d => 100.0] -dprob = DiscreteProblem(rn, u0, tspan, ps) +dprob = DiscreteProblem(bd_model, u0, tspan, ps) nothing # hide ``` -Let us simulate this model using the default options, and plot the results. Furthermore, we use [BenchmarkTools.jl's](https://github.com/JuliaCI/BenchmarkTools.jl) `@btime` macro to measure the time it takes to plot the output solution: +Let us simulate it using the default options, and plot the results. Furthermore, we use [BenchmarkTools.jl's](https://github.com/JuliaCI/BenchmarkTools.jl) `@btime` macro to measure the time it takes to plot the output solution: ```@example jump_simulation_performance_1 using BenchmarkTools jprob = JumpProblem(rn, dprob, Direct()) sol = solve(jprob, SSAStepper()) @btime plot(sol) ``` -This simulation generates a very large number of jumps, with the solution saved and plotted at each time such a jump occurs. If the number of jumps is high enough, the memory limits may be exceeded (causing Julia to crash). Here, we do not reach that limit, however, the performance of the `plot(sol)` is affected by the number of points it must render (rendering too much in a single plot is another potential cause of crashed due to memory strain). +This simulation generates a very large number of jumps, with the solution saved and plotted at each time such a jump occurs. If the number of jumps is high enough, the memory limits may be exceeded (causing Julia to crash). Here, we do not reach this limit, however, the run time of the `plot(sol)` is affected by the number of points it must render (rendering too much in a single plot is another potential cause of crashed due to memory strain). Next, we provide the `save_positions = (false, false)` option to `JumpProblem`. This turns off the saving of the solution both before and after each jump. ```@example jump_simulation_performance_1 jprob = JumpProblem(rn, dprob, Direct(); save_positions = (false, false)) nothing # hide ``` -However, if we were to simulate our model now, the solution would only actually be saved at its initial and final times. To remedy this, we provide the `saveat = 1.0` argument to the `solve` command, causing the solution to be saved at every `1.0`th time unit (at `t` = `0.0`, `1.0`, `2.0`, ... `999.0`, `1000.0`). +However, if we were to simulate our model now, the solution would only actually be saved at its initial and final times. To remedy this, we provide the `saveat = 1.0` argument to the `solve` command, ensuring that the solution to be saved at every `1.0`th time unit (that is, at `t` = `0.0`, `1.0`, `2.0`, ... `999.0`, `1000.0`). ```@example jump_simulation_performance_1 sol = solve(jprob, SSAStepper(), saveat = 1.0) nothing # hide @@ -54,13 +54,13 @@ we can now plot the new solution: Here, we note that the time to plot the simulation was reduced (for this example, admittedly from an already low level). Furthermore, the plots look different since the trajectory is sampled at much sparser time points. !!! note - With the default saving behavior, evaluating `sol(t)` at any time `t` within the `tspan` gives the *exact* value of the solution at that time (i.e. the vector of the exact number of each species). When using `saveat` and `save_positions = (false,false)` the solution is only saved at the selected time points, and as such `sol(t)` should not be evaluated except at times at which the solution was saved. While evaluating the solution at other times will return values they will not be the exact value of the jump process! + With the default saving behaviour, [evaluating `sol(t)`](@ref ref) at any time `t` within the `tspan` gives the *exact* value of the solution at that time (i.e. the vector of the exact number of each species). When using `saveat` and `save_positions = (false,false)` the solution is only saved at the selected time points, and as such `sol(t)` should not be evaluated except at times at which the solution was saved. While evaluating the solution at other times will return values they will not be the exact value of the jump process! -## Types of jumps +## [Types of jumps](@id jump_simulation_performance_jump_types) Each reaction in a chemical reaction network model corresponds to a possible jump of the jump simulation. These jumps can be divided into 3 categories: -- `MassActionJump`s: These correspond to reactions which rates are constant throughout the simulation. They are typically generated when the rate contains parameters only. -- `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated when the rate contains variables and parameters only. -- `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated when a reaction's rate depends on the time variable ($t$). +- `MassActionJump`s: These correspond to reactions which rates remain constant throughout the simulation.They are typically generated by reactions' which rates contain time-independent parameters only. +- `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated by reactions' which rates contain species and time-independent parameters only. +- `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated by reactions' which rates contain the time variable ($t$). Here are some example reactions for the different types of jumps: ```@example jump_simulation_performance_1 @@ -78,40 +78,40 @@ Here are some example reactions for the different types of jumps: @reaction X/t, X + Y --> XY nothing # hide ``` -Throughout a simulation, `VariableRateJump`s' rates require updating more frequently than `ConstantRateJump`s' rates, which in turn requires updating more frequently than `MassActionJump`s' rates. Since the performance of jump simulations is heavily affected by how frequently jump rates are computed, keeping the number of `ConstantRateJump`s and `VariableRateJump`s as small as possible is advantageous. Primarily, there exist two common cases where models are written in a way so that sub-optimal jump types are generated, both of which we describe below. +Updating `MassActionJump`s' propensities is more computationally efficient (due to their constrained form) than updating them for `ConstantRateJump`s. Thus simulations are more performant if a larger fraction of reactions are of the `MassActionJump` form (rather than the `ConstantRateJump` form). Furthermore, simulations containing `VariableRateJump` requires additional routines to find the times between jump events (as unlike non-`VariableRateJump` simulations, jump propensities change *between* jump events, making this computation more difficult). Hence, the existence of `VariableRateJump` can significantly slow down a simulation Primarily, there exist two common situation where models are written in a way so that sub-optimal jump types are generated, both of which we describe below. -#### Unnecessarily putting species in rates rather than in reactions +### [Unnecessarily putting species in rates rather than in reactions](@id jump_simulation_performance_jump_types_unnecessary_constantratejumps) Sometimes, a rate has been made dependent on a species, where that species instead could have been made part of the actual reaction. Consider a reaction where an enzyme ($E$) catalyses the phosphorylation of a protein ($X$) to phosphorylated form ($Xᵖ$). It can be written in two different forms: ```@example jump_simulation_performance_1 r1 = @reaction k*E, X --> Xᵖ r2 = @reaction k, X + E --> Xᵖ + E nothing # hide ``` -These two reactions will generate identical simulations (this holds for ODE, SDE, and Jump simulations). However, while `r1` will generate a `ConstantRateJump`, `r2` will generate a `MassActionJump`. Hence, if the `r2` form is used, jump simulation performance is, at no cost, improved. Since the two forms are otherwise identical, it is always preferable to, whenever possible, put species in the reactions, rather than the rates. +These two reactions will generate identical simulations (this holds for ODE, SDE, and Jump simulations). However, while `r1` will generate a `ConstantRateJump`, `r2` will generate a `MassActionJump`. Hence, if the `r2` form is used, jump simulation performance is (at no cost) improved. Since the two forms are otherwise identical, it is always preferable to, whenever possible, put species in the reactions, rather than the rates. -#### Using piecewise constant, time-dependant, function instead of callbacks. -Let us consider a reaction with some input that depends on time. We assume that the input is initially $0$, but after some critical time $t=5.0$ it is increased to $1$. This can be implemented through a step function: +### [Using piecewise constant, time-dependant, function instead of events](@id jump_simulation_performance_jump_types_unnecessary_variableratejumps) +Let us consider a reaction with some input that depends on time. We assume that the input is initially $0$, but after some critical time $t = 5.0$ it is increased to $1$. This can be implemented through a step function: ```@example jump_simulation_performance_1 input(t) = t > 5.0 r = @reaction input(t), 0 --> X nothing # hide ``` -Here, the production of species $X$ is switched on at the time $t=5.0$. This reaction (for which the rate depends on `t`) will generate a `VariableRateJump`, which is the least performant jump type in simulations, and can not be used with the standard SSAs of JumpProcesses. However, since the rate is piecewise constant, it can alternatively be implemented by setting it to a constant parameter $i$, and then using a [*discrete callback*](@ref advanced_simulations_callbacks) to update it at the switching times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). Generally such explicit time-discontinuities should be encoded via discrete callbacks instead of as `VariableRateJump`s if possible as simulation methods for the latter typically assume the system's propensities evolve continuously in-between jumps. +Here, the production of species $X$ is switched on at the time $t = 5.0$. This reaction (for which the rate depends on `t`) will generate a `VariableRateJump`, which is the least performant jump type in simulations. However, since the rate is piecewise constant, it can alternatively be implemented by setting it to a constant parameter $i$, and then using a [*discrete event*](@ref ref) to update it at the switching times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). Generally such explicit time-discontinuities should be encoded via discrete callbacks instead of as `VariableRateJump`s if possible (as simulation methods for the latter typically assume the system's propensities evolve continuously in-between jumps). -## Jump solver selection +## [Jump solver selection](@id jump_simulation_performance_solver_selection) When creating a `JumpProblem`, a specific solver is designated using its third argument. ```@example jump_simulation_performance_2 using Catalyst, JumpProcesses, Plots -rn = @reaction_network begin +bd_model = @reaction_network begin (p,d), 0 <--> X end u0 = [:X => 10] tspan = (0.0, 1000.0) ps = [:p => 1000.0, :d => 100.0] -dprob = DiscreteProblem(rn, u0, tspan, ps) -jprob = JumpProblem(rn, dprob, Direct()) +dprob = DiscreteProblem(bd_model, u0, tspan, ps) +jprob = JumpProblem(bd_model, dprob, Direct()) nothing # hide ``` Here (as throughout most of Catalyst's documentation) we have used the `Direct()` SSA solver (which corresponds to Gillespie's original direct method [^2][^3]). This method was originally published in 1976, and since then, many additional methods for simulating stochastic chemical kinetics models have been developed. @@ -124,15 +124,15 @@ Gillespie's direct method will, after a jump has been performed, recompute the p k3, X3 --> 0 end ``` -Here, the propensities of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions, `k1*X1*X2` and `k2*X2` respectively, do not depend on the amount of $X3$ in the system. Hence, the value of their propensities are unchanged by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have several ways to determine which rates require recomputing after the occurrence of a reaction, which improves their computational performance. Many of these depend on dependency graphs, which track which other reactions' propensities must be recomputed after the occurrence of a given reaction. Catalyst automatically builds such dependency graphs, which means that all JumpProcesses SSAs can be used without any additional inputs. +Here, the propensities of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions (`k1*X1*X2` and `k2*X2` respectively) do not depend on the amount of $X3$ in the system. Hence, the value of their propensities are unchanged by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have several ways to determine which rates require recomputing after the occurrence of a reaction, which improves their computational performance. Many of these requires a dependency graphs, which track which other reactions' propensities must be recomputed after the occurrence of a given reaction. Catalyst automatically builds such dependency graphs, which means that all JumpProcesses SSAs can be used without any additional inputs. A full list of jump simulation method implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^4][^5]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^6]) typically being more performant for larger models. For models that are simulated a large number of times, it can be worthwhile to try a few different jump simulation methods to determine which one is most performant in each given case. -## Hybrid simulations +## [Hybrid simulations](@id jump_simulation_performance_hybrid_simulations) For some models, copy numbers may vary greatly between different species. E.g. consider a genetic promoter which can either be in an inactive form ($Pᵢ$) or an active form ($Pₐ$). The active promoter produces a molecule ($M$): ```@example jump_simulation_performance_3 using Catalyst # hide -rn = @reaction_network begin +promoter = @reaction_network begin (kA,kI), Pᵢ <--> Pₐ p, Pₐ --> Pₐ + M d, M --> ∅ @@ -145,18 +145,20 @@ u0 = [:Pᵢ => 1, :Pₐ => 0, :M => 10000] tspan = (0.0, 5000.0) p = [:kA => 0.05, :kI => .01, :p => 500.0, :d => 0.0005] -dprob = DiscreteProblem(rn, u0, tspan, p) -jprob = JumpProblem(rn, dprob, RSSA()) +dprob = DiscreteProblem(promoter, u0, tspan, p) +jprob = JumpProblem(promoter, dprob, RSSA()) sol = solve(jprob, SSAStepper()) plt_1 = plot(sol; idxs=2) plt_2 = plot(sol; idxs=3) -plot(plt_1, plt_2, size=(1200,400)) +plot(plt_1, plt_2, size=(1200, 400)) ``` We note that the copy numbers $Pₐ$ are highly stochastic. Meanwhile, $M$ exists in so large copy numbers that its trajectory can be considered approximately deterministic. For models like this one, jump simulations are required to capture the stochastic behaviour of the promoter. However, the large number of jumps generated by $M$ makes such simulations excessively expensive. Here, *hybrid simulations* can be used. Hybrid simulations employ different simulation strategies for their different components. In our example, the state of the promoter could be simulated using a jump simulation, while $M$ could be simulated using an ODE. -Hybrid simulations for Catalyst models are currently not supported, though they are supported by the lower-level solvers in JumpProcesses and DifferentialEquations. It is work in progress to interface Catalyst to such lower-level solvers, and to provide better optimized hybrid methods. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation, or give suggestions on how one can directly interface with the lower-level hybrid solver interface. +Hybrid simulations for Catalyst models are currently not supported, though they are supported by the lower-level solvers in JumpProcesses and OrdinaryDiffEq. It is work in progress to interface Catalyst to such lower-level solvers, and to provide better optimised hybrid methods. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation, or give suggestions on how one can directly interface with the lower-level hybrid solver interface. +## [Simulation parallelisation](@id jump_simulation_performance_parallelisation) +When multiple simulations are carried out, simulations can be improved by running these in parallel. We have [previously described](@ref ref) how to do so for ODE simulation. Jump simulations can be parallelised in the same manner, with the exception that GPU parallelisation is currently not supported. --- ## References From 9f109f275b8b32f31a3016342a10a2bd770410c5 Mon Sep 17 00:00:00 2001 From: Torkel Date: Thu, 9 May 2024 13:09:10 -0400 Subject: [PATCH 19/24] rebase fix --- docs/pages.jl | 2 +- .../08_jump_simulation_performance.md} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/src/{catalyst_applications/jump_simulation_performance.md => 04_model_simulation/08_jump_simulation_performance.md} (100%) diff --git a/docs/pages.jl b/docs/pages.jl index 04f255eede..6515106ace 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -31,7 +31,7 @@ pages = Any[ # Stochastic simulation statistical analysis. # ODE Performance considerations/advice. # SDE Performance considerations/advice. - # Jump Performance considerations/advice. + "04_model_simulation/08_jump_simulation_performance.md", # Finite state projection ], "Steady state analysis" => Any[ diff --git a/docs/src/catalyst_applications/jump_simulation_performance.md b/docs/src/04_model_simulation/08_jump_simulation_performance.md similarity index 100% rename from docs/src/catalyst_applications/jump_simulation_performance.md rename to docs/src/04_model_simulation/08_jump_simulation_performance.md From bdf964a24457834c8e411f850a5b8ecd2c24fe0d Mon Sep 17 00:00:00 2001 From: Torkel Date: Thu, 16 May 2024 13:27:06 -0400 Subject: [PATCH 20/24] re rebase --- docs/pages.jl | 2 +- .../jump_simulation_performance.md} | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) rename docs/src/{04_model_simulation/08_jump_simulation_performance.md => model_simulation/jump_simulation_performance.md} (86%) diff --git a/docs/pages.jl b/docs/pages.jl index 6515106ace..9d38253db2 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -31,7 +31,7 @@ pages = Any[ # Stochastic simulation statistical analysis. # ODE Performance considerations/advice. # SDE Performance considerations/advice. - "04_model_simulation/08_jump_simulation_performance.md", + "model_simulation/jump_simulation_performance.md", # Finite state projection ], "Steady state analysis" => Any[ diff --git a/docs/src/04_model_simulation/08_jump_simulation_performance.md b/docs/src/model_simulation/jump_simulation_performance.md similarity index 86% rename from docs/src/04_model_simulation/08_jump_simulation_performance.md rename to docs/src/model_simulation/jump_simulation_performance.md index 889a69508a..1fa8bfa031 100644 --- a/docs/src/04_model_simulation/08_jump_simulation_performance.md +++ b/docs/src/model_simulation/jump_simulation_performance.md @@ -1,8 +1,8 @@ # [Advice for performant jump simulations](@id jump_simulation_performance) -We have previously introduced how to use *stochastic chemical kinetics* to perform jump simulations of *chemical reaction network* (CRRN) models (using e.g. Gillespie's algorithm). These simulations can, however, be very computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing runtime). Here, we describe various considerations for performant stochastic chemical kinetics simulations, which we will here refer to as jump process simulations. All jump process simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and the available solvers. +We have previously introduced how to use *stochastic chemical kinetics* to perform jump simulations of *chemical reaction network* (CRN) models (using e.g. Gillespie's algorithm). These simulations can, however, be very computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing run time). This tutorial describes various considerations for performant stochastic chemical kinetics simulations (which we will here refer to as jump simulations). All jump simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and its available solvers. ### [Brief (and optional) introduction to jump simulations](@id jump_simulation_performance_intro) -Jump processes are continuous-time, discrete-space stochastic processes. Exact realizations of these processes can be generated using Stochastic Simulation Algorithms (SSAs), of which Gillespie's Direct method is the most well-known choice. In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are increased by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $K$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. +Jump processes are continuous-time, discrete-space, stochastic processes. Exact realizations of these processes can be generated using SSAs (of which Gillespie's Direct method is the most well-known choice). In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are changed by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time unit that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $k$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. During a simulation, after the occurrence of a reaction, the simulation algorithm computes both the time to the next reaction and which reaction will occur at this time point. Both these (reaction time and type) are computed by making random draws from probability distributions that depends on the system's reactions' propensities. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (typically due to the occurrence of a reaction). Hence, jump simulations' run times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. @@ -31,20 +31,22 @@ nothing # hide Let us simulate it using the default options, and plot the results. Furthermore, we use [BenchmarkTools.jl's](https://github.com/JuliaCI/BenchmarkTools.jl) `@btime` macro to measure the time it takes to plot the output solution: ```@example jump_simulation_performance_1 using BenchmarkTools -jprob = JumpProblem(rn, dprob, Direct()) +jprob = JumpProblem(bd_model, dprob, Direct()) sol = solve(jprob, SSAStepper()) +sol = solve(jprob, SSAStepper(); seed = 1234) # hide @btime plot(sol) ``` This simulation generates a very large number of jumps, with the solution saved and plotted at each time such a jump occurs. If the number of jumps is high enough, the memory limits may be exceeded (causing Julia to crash). Here, we do not reach this limit, however, the run time of the `plot(sol)` is affected by the number of points it must render (rendering too much in a single plot is another potential cause of crashed due to memory strain). Next, we provide the `save_positions = (false, false)` option to `JumpProblem`. This turns off the saving of the solution both before and after each jump. ```@example jump_simulation_performance_1 -jprob = JumpProblem(rn, dprob, Direct(); save_positions = (false, false)) +jprob = JumpProblem(bd_model, dprob, Direct(); save_positions = (false, false)) nothing # hide ``` However, if we were to simulate our model now, the solution would only actually be saved at its initial and final times. To remedy this, we provide the `saveat = 1.0` argument to the `solve` command, ensuring that the solution to be saved at every `1.0`th time unit (that is, at `t` = `0.0`, `1.0`, `2.0`, ... `999.0`, `1000.0`). ```@example jump_simulation_performance_1 -sol = solve(jprob, SSAStepper(), saveat = 1.0) +sol = solve(jprob, SSAStepper(); saveat = 1.0) +sol = solve(jprob, SSAStepper(); saveat = 1.0, seed = 1234) # hide nothing # hide ``` we can now plot the new solution: @@ -148,6 +150,7 @@ p = [:kA => 0.05, :kI => .01, :p => 500.0, :d => 0.0005] dprob = DiscreteProblem(promoter, u0, tspan, p) jprob = JumpProblem(promoter, dprob, RSSA()) sol = solve(jprob, SSAStepper()) +sol = solve(jprob, SSAStepper(); seed = 1234) # hide plt_1 = plot(sol; idxs=2) plt_2 = plot(sol; idxs=3) From 99ed8929e1ebbd88bf75f36fca96d2be65cc05f0 Mon Sep 17 00:00:00 2001 From: Torkel Date: Sat, 13 Jul 2024 18:15:16 -0400 Subject: [PATCH 21/24] update internal references --- README.md | 2 +- docs/src/index.md | 2 +- docs/src/model_simulation/jump_simulation_performance.md | 2 +- docs/src/model_simulation/simulation_introduction.md | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 761b2a26b3..10b4ae33e4 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ be found in its corresponding research paper, [Catalyst: Fast and flexible model - [Conservation laws can be detected and utilized](https://docs.sciml.ai/Catalyst/stable/model_creation/network_analysis/#network_analysis_deficiency) to reduce system sizes, and to generate non-singular Jacobians (e.g. during conversion to ODEs, SDEs, and steady state equations). - Catalyst reaction network models can be [coupled with differential and algebraic equations](https://docs.sciml.ai/Catalyst/stable/model_creation/constraint_equations/) (which are then incorporated during conversion to ODEs, SDEs, and steady state equations). - Models can be [coupled with events](https://docs.sciml.ai/Catalyst/stable/model_creation/constraint_equations/#constraint_equations_events) that affect the system and its state during simulations. -- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#jump_types), [automatic construction of dependency graphs for jump systems](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-Requiring-Dependency-Graphs), and more. +- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](https://docs.sciml.ai/Catalyst/stable/model_simulation/jump_simulation_performance/#types_of_jumps), [automatic construction of dependency graphs for jump systems](https://docs.sciml.ai/Catalyst/stable/model_simulation/jump_simulation_performance/#jump_solver_selection), and more. - [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl) symbolic expressions and Julia `Expr`s can be obtained for all rate laws and functions determining the deterministic and stochastic terms within resulting ODE, SDE, or jump models. - [Steady states](https://docs.sciml.ai/Catalyst/stable/steady_state_functionality/homotopy_continuation/) (and their [stabilities](https://docs.sciml.ai/Catalyst/stable/steady_state_functionality/steady_state_stability_computation/)) can be computed for model ODE representations. diff --git a/docs/src/index.md b/docs/src/index.md index 7ba02ee07f..3a6f5a31bc 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -27,7 +27,7 @@ etc). - [Conservation laws can be detected and utilized](@ref network_analysis_deficiency) to reduce system sizes, and to generate non-singular Jacobians (e.g. during conversion to ODEs, SDEs, and steady state equations). - Catalyst reaction network models can be [coupled with differential and algebraic equations](@ref constraint_equations_coupling_constraints) (which are then incorporated during conversion to ODEs, SDEs, and steady state equations). - Models can be [coupled with events](@ref constraint_equations_events) that affect the system and its state during simulations. -- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](@ref ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](@ref ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#jump_types), [automatic construction of dependency graphs for jump systems](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-Requiring-Dependency-Graphs), and more. +- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](@ref ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](@ref ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](@ref jump_simulation_performance_jump_types), [automatic construction of dependency graphs for jump systems](@ref jump_simulation_performance_solver_selection), and more. - [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl) symbolic expressions and Julia `Expr`s can be obtained for all rate laws and functions determining the deterministic and stochastic terms within resulting ODE, SDE, or jump models. - [Steady states](@ref homotopy_continuation) (and their [stabilities](@ref steady_state_stability)) can be computed for model ODE representations. diff --git a/docs/src/model_simulation/jump_simulation_performance.md b/docs/src/model_simulation/jump_simulation_performance.md index 1fa8bfa031..ba8ca9245d 100644 --- a/docs/src/model_simulation/jump_simulation_performance.md +++ b/docs/src/model_simulation/jump_simulation_performance.md @@ -141,7 +141,7 @@ promoter = @reaction_network begin end ``` Let us simulate this model and consider the copy numbers of each individual component: -```@example jump_simulation_performance_2 +```@example jump_simulation_performance_3 using JumpProcesses, Plots # hide u0 = [:Pᵢ => 1, :Pₐ => 0, :M => 10000] tspan = (0.0, 5000.0) diff --git a/docs/src/model_simulation/simulation_introduction.md b/docs/src/model_simulation/simulation_introduction.md index f2aafd8ec7..180eef3aac 100644 --- a/docs/src/model_simulation/simulation_introduction.md +++ b/docs/src/model_simulation/simulation_introduction.md @@ -1,7 +1,7 @@ # [Model Simulation Introduction](@id simulation_intro) Catalyst's core functionality is the creation of *chemical reaction network* (CRN) models that can be simulated using ODE, SDE, and jump simulations. How such simulations are carried out has already been described in [Catalyst's introduction](@ref introduction_to_catalyst). This page provides a deeper introduction, giving some additional background and introducing various simulation-related options. -Here we will focus on the basics, with other sections of the simulation documentation describing various specialised features, or giving advice on performance. Anyone who plans on using Catalyst's simulation functionality extensively is recommended to also read the documentation on [solution plotting](@ref simulation_plotting), and on how to [interact with simulation problems, integrators, and solutions](@ref simulation_structure_interfacing). Anyone with an application for which performance is critical should consider reading the corresponding page on performance advice for [ODEs](@ref ode_simulation_performance) or [SDEs](@ref sde_simulation_performance). +Here we will focus on the basics, with other sections of the simulation documentation describing various specialised features, or giving advice on performance. Anyone who plans on using Catalyst's simulation functionality extensively is recommended to also read the documentation on [solution plotting](@ref simulation_plotting), and on how to [interact with simulation problems, integrators, and solutions](@ref simulation_structure_interfacing). Anyone with an application for which performance is critical should consider reading the corresponding page on performance advice for [ODE](@ref ode_simulation_performance), [SDE](@ref sde_simulation_performance), or [jump](@ref jump_simulation_performance) simulations. ### [Background to CRN simulations](@id simulation_intro_theory) This section provides some brief theory on CRN simulations. For details on how to carry out these simulations in actual code, please skip to the following sections. @@ -290,7 +290,7 @@ While the `@default_noise_scaling` option is unavailable for [programmatically c ## [Performing jump simulations using stochastic chemical kinetics](@id simulation_intro_jumps) -Catalyst uses the [JumpProcesses.jl](https://github.com/SciML/JumpProcesses.jl) package to perform jump simulations. This section provides a brief introduction, with [JumpProcesses's documentation](https://docs.sciml.ai/JumpProcesses/stable/) providing a more extensive description. +Catalyst uses the [JumpProcesses.jl](https://github.com/SciML/JumpProcesses.jl) package to perform jump simulations. This section provides a brief introduction, with [JumpProcesses's documentation](https://docs.sciml.ai/JumpProcesses/stable/) providing a more extensive description. A dedicated section giving advice on how to optimise jump simulation performance can be found [here](@ref jump_simulation_performance). Jump simulations are performed using so-called `JumpProblem`s. Unlike ODEs and SDEs (for which the corresponding problem types can be created directly), jump simulations require first creating an intermediary `DiscreteProblem`. In this example, we first declare our two-state model and its initial conditions, time span, and parameter values. ```@example simulation_intro_jumps From a78fe8806993539a3d8f7f250a52acabc5052bdc Mon Sep 17 00:00:00 2001 From: Torkel Date: Sat, 13 Jul 2024 21:49:55 -0400 Subject: [PATCH 22/24] add more cross references --- .../model_simulation/jump_simulation_performance.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/model_simulation/jump_simulation_performance.md b/docs/src/model_simulation/jump_simulation_performance.md index ba8ca9245d..b3b083529d 100644 --- a/docs/src/model_simulation/jump_simulation_performance.md +++ b/docs/src/model_simulation/jump_simulation_performance.md @@ -2,7 +2,7 @@ We have previously introduced how to use *stochastic chemical kinetics* to perform jump simulations of *chemical reaction network* (CRN) models (using e.g. Gillespie's algorithm). These simulations can, however, be very computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing run time). This tutorial describes various considerations for performant stochastic chemical kinetics simulations (which we will here refer to as jump simulations). All jump simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and its available solvers. ### [Brief (and optional) introduction to jump simulations](@id jump_simulation_performance_intro) -Jump processes are continuous-time, discrete-space, stochastic processes. Exact realizations of these processes can be generated using SSAs (of which Gillespie's Direct method is the most well-known choice). In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are changed by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time unit that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $k$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. +Jump processes are continuous-time, discrete-space, stochastic processes. Exact realizations of these processes can be generated using SSAs (of which Gillespie's Direct method is the most well-known choice). In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are changed by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time unit that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $k$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref introduction_to_catalyst_ratelaws) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. During a simulation, after the occurrence of a reaction, the simulation algorithm computes both the time to the next reaction and which reaction will occur at this time point. Both these (reaction time and type) are computed by making random draws from probability distributions that depends on the system's reactions' propensities. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (typically due to the occurrence of a reaction). Hence, jump simulations' run times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. @@ -14,7 +14,7 @@ Typically, propensities are recomputed only at jump events. This means that jump A more thorough overview of simulation methods for Stochastic Chemical Kinetics jump process models and their computational efficiency is given in [^1]. ## [Managing of solution saving](@id jump_simulation_performance_solution_saving) -By default, `solve` saves the value of the solution at the time of every jump. For simulations with a large number of jump events, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. Let us consider a simple [birth-death model](@ref ref): +By default, `solve` saves the value of the solution at the time of every jump. For simulations with a large number of jump events, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. Let us consider a simple [birth-death model](@ref basic_CRN_library_bd): ```@example jump_simulation_performance_1 using Catalyst, JumpProcesses, Plots @@ -56,7 +56,7 @@ we can now plot the new solution: Here, we note that the time to plot the simulation was reduced (for this example, admittedly from an already low level). Furthermore, the plots look different since the trajectory is sampled at much sparser time points. !!! note - With the default saving behaviour, [evaluating `sol(t)`](@ref ref) at any time `t` within the `tspan` gives the *exact* value of the solution at that time (i.e. the vector of the exact number of each species). When using `saveat` and `save_positions = (false,false)` the solution is only saved at the selected time points, and as such `sol(t)` should not be evaluated except at times at which the solution was saved. While evaluating the solution at other times will return values they will not be the exact value of the jump process! + With the default saving behaviour, [evaluating `sol(t)`](@ref simulation_structure_interfacing_solutions) at any time `t` within the `tspan` gives the *exact* value of the solution at that time (i.e. the vector of the exact number of each species). When using `saveat` and `save_positions = (false,false)` the solution is only saved at the selected time points, and as such `sol(t)` should not be evaluated except at times at which the solution was saved. While evaluating the solution at other times will return values they will not be the exact value of the jump process! ## [Types of jumps](@id jump_simulation_performance_jump_types) Each reaction in a chemical reaction network model corresponds to a possible jump of the jump simulation. These jumps can be divided into 3 categories: @@ -98,7 +98,7 @@ input(t) = t > 5.0 r = @reaction input(t), 0 --> X nothing # hide ``` -Here, the production of species $X$ is switched on at the time $t = 5.0$. This reaction (for which the rate depends on `t`) will generate a `VariableRateJump`, which is the least performant jump type in simulations. However, since the rate is piecewise constant, it can alternatively be implemented by setting it to a constant parameter $i$, and then using a [*discrete event*](@ref ref) to update it at the switching times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). Generally such explicit time-discontinuities should be encoded via discrete callbacks instead of as `VariableRateJump`s if possible (as simulation methods for the latter typically assume the system's propensities evolve continuously in-between jumps). +Here, the production of species $X$ is switched on at the time $t = 5.0$. This reaction (for which the rate depends on `t`) will generate a `VariableRateJump`, which is the least performant jump type in simulations. However, since the rate is piecewise constant, it can alternatively be implemented by setting it to a constant parameter $i$, and then using a [*discrete event*](@ref constraint_equations_events) to update it at the switching times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). Generally such explicit time-discontinuities should be encoded via discrete callbacks instead of as `VariableRateJump`s if possible (as simulation methods for the latter typically assume the system's propensities evolve continuously in-between jumps). ## [Jump solver selection](@id jump_simulation_performance_solver_selection) When creating a `JumpProblem`, a specific solver is designated using its third argument. @@ -161,7 +161,7 @@ We note that the copy numbers $Pₐ$ are highly stochastic. Meanwhile, $M$ exist Hybrid simulations for Catalyst models are currently not supported, though they are supported by the lower-level solvers in JumpProcesses and OrdinaryDiffEq. It is work in progress to interface Catalyst to such lower-level solvers, and to provide better optimised hybrid methods. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation, or give suggestions on how one can directly interface with the lower-level hybrid solver interface. ## [Simulation parallelisation](@id jump_simulation_performance_parallelisation) -When multiple simulations are carried out, simulations can be improved by running these in parallel. We have [previously described](@ref ref) how to do so for ODE simulation. Jump simulations can be parallelised in the same manner, with the exception that GPU parallelisation is currently not supported. +When multiple simulations are carried out, simulations can be improved by running these in parallel. We have [previously described](@ref ode_simulation_performance_parallelisation) how to do so for ODE simulation. Jump simulations can be parallelised in the same manner, with the exception that GPU parallelisation is currently not supported. --- ## References From 723153735476b80891dadbd341552ae2684528ee Mon Sep 17 00:00:00 2001 From: Sam Isaacson Date: Thu, 8 Aug 2024 16:34:23 -0400 Subject: [PATCH 23/24] start making some revisions --- .../jump_simulation_performance.md | 75 +++++++++++++++---- .../simulation_introduction.md | 17 ++--- 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/docs/src/model_simulation/jump_simulation_performance.md b/docs/src/model_simulation/jump_simulation_performance.md index b3b083529d..7f525edd9a 100644 --- a/docs/src/model_simulation/jump_simulation_performance.md +++ b/docs/src/model_simulation/jump_simulation_performance.md @@ -1,17 +1,64 @@ # [Advice for performant jump simulations](@id jump_simulation_performance) -We have previously introduced how to use *stochastic chemical kinetics* to perform jump simulations of *chemical reaction network* (CRN) models (using e.g. Gillespie's algorithm). These simulations can, however, be very computationally intensive. Fortunately, there are several ways to increase their performance (thus reducing run time). This tutorial describes various considerations for performant stochastic chemical kinetics simulations (which we will here refer to as jump simulations). All jump simulations arising from stochastic chemical kinetics representations of Catalyst models are performed using stochastic simulation algorithms (SSAs) from JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and its available solvers. +We have previously introduced how Catalyst *chemical reaction network* (CRN) +models can be converted to *stochastic chemical kinetics* jump process models, +which can then be (exactly) simulated (i.e. sampled) using Stochastic Simulation +Algorithms (SSAs) such as Gillespie's Direct method. These simulations can, +however, be very computationally intensive. Fortunately, there are several ways +to increase their performance (thus reducing run time). This tutorial describes +various considerations for performant stochastic chemical kinetics simulations +(which we will here refer to as jump simulations). All jump simulations arising +from stochastic chemical kinetics representations of Catalyst models are +performed using SSAs from JumpProcesses.jl. Please see the [JumpProcesses +documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive +introduction to the package and its available solvers. ### [Brief (and optional) introduction to jump simulations](@id jump_simulation_performance_intro) -Jump processes are continuous-time, discrete-space, stochastic processes. Exact realizations of these processes can be generated using SSAs (of which Gillespie's Direct method is the most well-known choice). In the chemical reaction modelling contexts, the discrete-state variables typically correspond to the integer-valued number of each chemical species at each time. A system's state changes at discrete time points (the jump times) when the amount of one (or more) species are changed by integer amount(s) (for example, the creation of a new protein due to translation, or the removal of one protein due to degradation). For CRNs, these jumps correspond to the occurrence of individual reactions. Typically, the frequency of each reaction depends on its *propensity* (which in turn depends on its *rate* and *substrates*). The propensity of a reaction (which is computed through as its *rate law*) represents the probability per time unit that it occurs (also known as the associated jump process' intensity function). For example, the reaction `k, A + B --> C + D` has a rate $k$ and a propensity $k*A*B$. See [Reaction rate laws used in simulations](@ref introduction_to_catalyst_ratelaws) for more details of what propensity function Catalyst generates for a given stochastic chemical kinetics reaction. - -During a simulation, after the occurrence of a reaction, the simulation algorithm computes both the time to the next reaction and which reaction will occur at this time point. Both these (reaction time and type) are computed by making random draws from probability distributions that depends on the system's reactions' propensities. As the latter depend on the state of the system, they must be recomputed whenever the system's state changes (typically due to the occurrence of a reaction). Hence, jump simulations' run times are heavily dependent on how frequently these propensities must be recomputed, and how many must be recomputed when a reaction occurs. - -Typically, propensities are recomputed only at jump events. This means that jump simulations' run times generally scale with the number of jumps that occur. Run times typically become prohibitively expensive for: +Jump processes are continuous-time, discrete-space, stochastic processes. Exact +realizations of these processes can be generated using SSAs (of which +Gillespie's Direct method is the most well-known choice). In the chemical +reaction modelling context, the discrete-state variables typically correspond to +the integer-valued number of each chemical species at each time. A system's +state changes at discrete time points (the jump times) when the amount of one +(or more) species are changed by integer amount(s) (for example, the creation of +a new protein due to translation, or the removal of one protein due to +degradation). + +For CRNs, these jumps correspond to the occurrence of individual reactions. +Typically, the frequency of each reaction depends on its *propensity* function +(which in turn depends on its *rate constant* and *substrate* amounts). The +propensity of a reaction is analogous to the reaction's *rate law* in the ODE +context, and represents the probability per unit time the reaction can occur +given the current state of the system. In probability and statistics +propensities are also often called intensity functions or transition rate +functions. For example, the reaction $A + B \overset{k}{\to} C + D$ has a rate +$k$ and a propensity $k A B$. See [Reaction rate laws used in simulations](@ref +introduction_to_catalyst_ratelaws) for more details of what propensity function +Catalyst generates for a given stochastic chemical kinetics reaction, and +[Mathematical Models Catalyst can Generate](@ref math_models_in_catalyst) for +details on the mathematical jump process models Catalyst can generate from a +CRN. + +During a typical simulation of a jump process model, after the occurrence of a +reaction, the simulation algorithm computes both the time to the next reaction +and which reaction will occur at this time point. Both these (reaction time and +type) are computed by making random draws from probability distributions that +depend on the system's reactions' propensities. As the latter depend on the +state of the system, they must be recomputed whenever the system's state changes +(typically due to the occurrence of a reaction). Hence, jump simulations' run +times are heavily dependent on how frequently these propensities must be +recomputed, and how many must be recomputed when a reaction occurs. + +A variety of factors can impact the run time of different SSAs. Such simulations +can become increasingly expensive for: - Simulations of large models (i.e. with many different species, where some species occur in large copy numbers, or where many reactions are present in a system). -- Simulations over long time spans. -- Simulations that are performed a large number of times. +- Simulations over long time spans or with large numbers of reactions occurring + over the desired time span. +- Simulations that are performed a large number of times for statistical sampling. +- Simulations that are saving large amounts of data (such as saving the values + of all species each time a reaction occurs). -A more thorough overview of simulation methods for Stochastic Chemical Kinetics jump process models and their computational efficiency is given in [^1]. +A more thorough overview of simulation methods for Stochastic Chemical Kinetics +jump process models and their computational efficiency is given in [^1]. ## [Managing of solution saving](@id jump_simulation_performance_solution_saving) By default, `solve` saves the value of the solution at the time of every jump. For simulations with a large number of jump events, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. Let us consider a simple [birth-death model](@ref basic_CRN_library_bd): @@ -38,7 +85,7 @@ sol = solve(jprob, SSAStepper(); seed = 1234) # hide ``` This simulation generates a very large number of jumps, with the solution saved and plotted at each time such a jump occurs. If the number of jumps is high enough, the memory limits may be exceeded (causing Julia to crash). Here, we do not reach this limit, however, the run time of the `plot(sol)` is affected by the number of points it must render (rendering too much in a single plot is another potential cause of crashed due to memory strain). -Next, we provide the `save_positions = (false, false)` option to `JumpProblem`. This turns off the saving of the solution both before and after each jump. +Next, we provide the `save_positions = (false, false)` option to `JumpProblem`. This turns off the saving of the solution both before and after each jump. ```@example jump_simulation_performance_1 jprob = JumpProblem(bd_model, dprob, Direct(); save_positions = (false, false)) nothing # hide @@ -60,8 +107,8 @@ Here, we note that the time to plot the simulation was reduced (for this example ## [Types of jumps](@id jump_simulation_performance_jump_types) Each reaction in a chemical reaction network model corresponds to a possible jump of the jump simulation. These jumps can be divided into 3 categories: -- `MassActionJump`s: These correspond to reactions which rates remain constant throughout the simulation.They are typically generated by reactions' which rates contain time-independent parameters only. -- `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated by reactions' which rates contain species and time-independent parameters only. +- `MassActionJump`s: These correspond to reactions which rates remain constant throughout the simulation.They are typically generated by reactions' which rates contain time-independent parameters only. +- `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated by reactions' which rates contain species and time-independent parameters only. - `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated by reactions' which rates contain the time variable ($t$). Here are some example reactions for the different types of jumps: @@ -85,7 +132,7 @@ Updating `MassActionJump`s' propensities is more computationally efficient (due ### [Unnecessarily putting species in rates rather than in reactions](@id jump_simulation_performance_jump_types_unnecessary_constantratejumps) Sometimes, a rate has been made dependent on a species, where that species instead could have been made part of the actual reaction. Consider a reaction where an enzyme ($E$) catalyses the phosphorylation of a protein ($X$) to phosphorylated form ($Xᵖ$). It can be written in two different forms: ```@example jump_simulation_performance_1 -r1 = @reaction k*E, X --> Xᵖ +r1 = @reaction k*E, X --> Xᵖ r2 = @reaction k, X + E --> Xᵖ + E nothing # hide ``` @@ -116,7 +163,7 @@ dprob = DiscreteProblem(bd_model, u0, tspan, ps) jprob = JumpProblem(bd_model, dprob, Direct()) nothing # hide ``` -Here (as throughout most of Catalyst's documentation) we have used the `Direct()` SSA solver (which corresponds to Gillespie's original direct method [^2][^3]). This method was originally published in 1976, and since then, many additional methods for simulating stochastic chemical kinetics models have been developed. +Here (as throughout most of Catalyst's documentation) we have used the `Direct()` SSA solver (which corresponds to Gillespie's original direct method [^2][^3]). This method was originally published in 1976, and since then, many additional methods for simulating stochastic chemical kinetics models have been developed. Gillespie's direct method will, after a jump has been performed, recompute the propensities of *all* possible jumps in the system (i.e. of all reactions). This is typically not required. E.g. consider the following system: ```@example jump_simulation_performance_2 diff --git a/docs/src/model_simulation/simulation_introduction.md b/docs/src/model_simulation/simulation_introduction.md index 180eef3aac..dfeed94e39 100644 --- a/docs/src/model_simulation/simulation_introduction.md +++ b/docs/src/model_simulation/simulation_introduction.md @@ -1,5 +1,5 @@ # [Model Simulation Introduction](@id simulation_intro) -Catalyst's core functionality is the creation of *chemical reaction network* (CRN) models that can be simulated using ODE, SDE, and jump simulations. How such simulations are carried out has already been described in [Catalyst's introduction](@ref introduction_to_catalyst). This page provides a deeper introduction, giving some additional background and introducing various simulation-related options. +Catalyst's core functionality is the creation of *chemical reaction network* (CRN) models that can be simulated using ODE, SDE, and jump simulations. How such simulations are carried out has already been described in [Catalyst's introduction](@ref introduction_to_catalyst). This page provides a deeper introduction, giving some additional background and introducing various simulation-related options. Here we will focus on the basics, with other sections of the simulation documentation describing various specialised features, or giving advice on performance. Anyone who plans on using Catalyst's simulation functionality extensively is recommended to also read the documentation on [solution plotting](@ref simulation_plotting), and on how to [interact with simulation problems, integrators, and solutions](@ref simulation_structure_interfacing). Anyone with an application for which performance is critical should consider reading the corresponding page on performance advice for [ODE](@ref ode_simulation_performance), [SDE](@ref sde_simulation_performance), or [jump](@ref jump_simulation_performance) simulations. @@ -133,7 +133,7 @@ Here follows a list of solver options which might be of interest to the user. - `adaptive`: Toggles adaptive time stepping for valid methods. Default to `true`. - `dt`: For non-adaptive simulations, sets the step size (also sets the initial step size for adaptive methods). - `saveat`: Determines the time points at which the simulation is saved. E.g. for `saveat = 2.0` the simulation is saved every second time unit. If not given, the solution is saved after each time step. -- `save_idxs`: Provides a vector of species whose values should be saved during the simulation. E.g. for `save_idxs = [:X1]`, only the value of species $X1$ is saved. +- `save_idxs`: Provides a vector of species whose values should be saved during the simulation. E.g. for `save_idxs = [:X1]`, only the value of species $X1$ is saved. - `maxiters`: The maximum number of time steps of the simulation. If this number is reached, the simulation is terminated. - `seed`: Sets a seed for stochastic simulations. Stochastic simulations with the same seed generate identical results. @@ -156,7 +156,7 @@ ps = Dict([:k1 => 2.0, :k2 => 5.0]) oprob = ODEProblem(two_state_model, u0, tspan, ps) nothing # hide ``` -The forms used for `u0` and `ps` does not need to be the same (but can e.g. be a vector and a tuple). +The forms used for `u0` and `ps` does not need to be the same (but can e.g. be a vector and a tuple). !!! note It is possible to [designate specific types for parameters](@ref dsl_advanced_options_parameter_types). When this is done, the tuple form for providing parameter values should be preferred. @@ -186,7 +186,7 @@ sol = solve(sprob, STrapezoid()) sol = solve(sprob, STrapezoid(); seed = 123) # hide plot(sol) ``` -we can see that while this simulation (unlike the ODE ones) exhibits some fluctuations. +we can see that while this simulation (unlike the ODE ones) exhibits some fluctuations. !!! note Unlike for ODE and jump simulations, there are no good heuristics for automatically selecting suitable SDE solvers. Hence, for SDE simulations a solver must be provided. `STrapezoid` will work for a large number of cases. When this is not the case, however, please check the list of [available SDE solvers](https://docs.sciml.ai/DiffEqDocs/stable/solvers/sde_solve/) for a suitable alternative (making sure to select one compatible with non-diagonal noise and the [Ito interpretation]https://en.wikipedia.org/wiki/It%C3%B4_calculus). @@ -318,7 +318,7 @@ nothing # hide ``` The `JumpProblem` can now be simulated using `solve` (just like any other problem type). ```@example simulation_intro_jumps -sol = solve(jprob, SSAStepper()) +sol = solve(jprob) nothing # hide ``` If we plot the solution we can see how the system's state does not change continuously, but instead in discrete jumps (due to the occurrence of the individual reactions of the system). @@ -335,9 +335,8 @@ Several different aggregators are available (a full list is provided [here](http jprob = JumpProblem(two_state_model, dprob, SortingDirect()) nothing # hide ``` -Especially for large systems, the choice of aggregator is relevant to simulation performance. - -Next, a simulation method can be provided (like for ODEs and SDEs) as the second argument to `solve`. Currently, the only relevant solver is `SSAStepper()` (which is the one used throughout Catalyst's documentation). Other choices are primarily relevant to combined ODE/SDE + jump simulations, or inexact simulations. These situations are described in more detail [here](https://docs.sciml.ai/JumpProcesses/stable/jump_solve/). +Especially for large systems, the choice of aggregator can significantly impact +simulation performance. ### [Jump simulations where some rate depends on time](@id simulation_intro_jumps_variableratejumps) For some models, the rate of some reactions depend on time. E.g. consider the following [circadian model](https://en.wikipedia.org/wiki/Circadian_rhythm), where the production rate of some protein ($P$) depends on a sinusoid function: @@ -347,7 +346,7 @@ circadian_model = @reaction_network begin d, P --> 0 end ``` -This type of model will generate so called *variable rate jumps*. Simulation of such model is non-trivial (and Catalyst currently lacks a good interface for this). A detailed description of how to carry out jump simulations for models with time-dependant rates can be found [here](https://docs.sciml.ai/JumpProcesses/stable/tutorials/simple_poisson_process/#VariableRateJumps-for-processes-that-are-not-constant-between-jumps). +This type of model will generate so called *variable rate jumps*, `VariableRateJump`s in JumpProcesses. Simulation of such models is generally more computationally expensive than those without an explicit time dependence (and Catalyst currently lacks a good interface for this). A detailed description of how to carry out jump simulations for models with rates having an explicit time-dependence can be found [here](https://docs.sciml.ai/JumpProcesses/stable/tutorials/simple_poisson_process/#VariableRateJumps-for-processes-that-are-not-constant-between-jumps). --- From 2b70ea0f1743ee9b9b3f19ee2f0b8ef750e83494 Mon Sep 17 00:00:00 2001 From: Sam Isaacson Date: Fri, 9 Aug 2024 15:48:55 -0400 Subject: [PATCH 24/24] more updates --- README.md | 2 +- docs/pages.jl | 2 +- docs/src/index.md | 2 +- .../model_simulation/jump_simulation_guide.md | 245 ++++++++++++++++++ .../simulation_introduction.md | 4 +- .../jump_simulation_performance.md | 16 +- 6 files changed, 258 insertions(+), 13 deletions(-) create mode 100644 docs/src/model_simulation/jump_simulation_guide.md rename docs/{src/model_simulation => unpublished}/jump_simulation_performance.md (96%) diff --git a/README.md b/README.md index 0106bd271b..80c4ebc6af 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ be found in its corresponding research paper, [Catalyst: Fast and flexible model - [Conservation laws can be detected and utilized](https://docs.sciml.ai/Catalyst/stable/model_creation/conservation_laws/) to reduce system sizes, and to generate non-singular Jacobians (e.g. during conversion to ODEs, SDEs, and steady state equations). - Catalyst reaction network models can be [coupled with differential and algebraic equations](https://docs.sciml.ai/Catalyst/stable/model_creation/constraint_equations/) (which are then incorporated during conversion to ODEs, SDEs, and steady state equations). - Models can be [coupled with events](https://docs.sciml.ai/Catalyst/stable/model_creation/constraint_equations/#constraint_equations_events) that affect the system and its state during simulations. -- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](https://docs.sciml.ai/Catalyst/stable/model_simulation/jump_simulation_performance/#types_of_jumps), [automatic construction of dependency graphs for jump systems](https://docs.sciml.ai/Catalyst/stable/model_simulation/jump_simulation_performance/#jump_solver_selection), and more. +- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](https://docs.sciml.ai/Catalyst/stable/model_simulation/ode_simulation_performance/#ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](https://docs.sciml.ai/Catalyst/stable/model_simulation/jump_simulation_guide/#types_of_jumps), [automatic construction of dependency graphs for jump systems](https://docs.sciml.ai/Catalyst/stable/model_simulation/jump_simulation_guide/#jump_solver_selection), and more. - [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl) symbolic expressions and Julia `Expr`s can be obtained for all rate laws and functions determining the deterministic and stochastic terms within resulting ODE, SDE, or jump models. - [Steady states](https://docs.sciml.ai/Catalyst/stable/steady_state_functionality/homotopy_continuation/) (and their [stabilities](https://docs.sciml.ai/Catalyst/stable/steady_state_functionality/steady_state_stability_computation/)) can be computed for model ODE representations. diff --git a/docs/pages.jl b/docs/pages.jl index 9a218f05ed..647224da60 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -32,7 +32,7 @@ pages = Any[ "model_simulation/ensemble_simulations.md", "model_simulation/ode_simulation_performance.md", "model_simulation/sde_simulation_performance.md", - "model_simulation/jump_simulation_performance.md", + "model_simulation/jump_simulation_guide.md", "Model simulation examples" => Any[ "model_simulation/examples/periodic_events_simulation.md" ] diff --git a/docs/src/index.md b/docs/src/index.md index c9791f2fc5..d296cf9932 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -27,7 +27,7 @@ etc). - [Conservation laws can be detected and utilized](@ref conservation_laws) to reduce system sizes, and to generate non-singular Jacobians (e.g. during conversion to ODEs, SDEs, and steady state equations). - Catalyst reaction network models can be [coupled with differential and algebraic equations](@ref constraint_equations_coupling_constraints) (which are then incorporated during conversion to ODEs, SDEs, and steady state equations). - Models can be [coupled with events](@ref constraint_equations_events) that affect the system and its state during simulations. -- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](@ref ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](@ref ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](@ref jump_simulation_performance_jump_types), [automatic construction of dependency graphs for jump systems](@ref jump_simulation_performance_solver_selection), and more. +- By leveraging ModelingToolkit, users have a variety of options for generating optimized system representations to use in solvers. These include construction of [dense or sparse Jacobians](@ref ode_simulation_performance_sparse_jacobian), [multithreading or parallelization of generated derivative functions](@ref ode_simulation_performance_parallelisation), [automatic classification of reactions into optimized jump types for Gillespie type simulations](@ref jump_simulation_guide_jump_types), [automatic construction of dependency graphs for jump systems](@ref jump_simulation_guide_solver_selection), and more. - [Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl) symbolic expressions and Julia `Expr`s can be obtained for all rate laws and functions determining the deterministic and stochastic terms within resulting ODE, SDE, or jump models. - [Steady states](@ref homotopy_continuation) (and their [stabilities](@ref steady_state_stability)) can be computed for model ODE representations. diff --git a/docs/src/model_simulation/jump_simulation_guide.md b/docs/src/model_simulation/jump_simulation_guide.md new file mode 100644 index 0000000000..54f18a18b8 --- /dev/null +++ b/docs/src/model_simulation/jump_simulation_guide.md @@ -0,0 +1,245 @@ +# [Advice for Stochastic Chemical Kinetics Jump Simulations](@id jump_simulation_guide) +We have previously introduced how Catalyst *chemical reaction network* (CRN) +models can be converted to *stochastic chemical kinetics* jump process models, +which can then be (exactly) simulated (i.e. sampled) using Stochastic Simulation +Algorithms (SSAs) such as Gillespie's Direct method. + +In this tutorial we will review basic information and configuration options for +such simulations, and discuss several ways to increase their performance (thus +reducing run time). All jump simulations arising from stochastic chemical +kinetics representations of Catalyst models are performed using SSAs from +JumpProcesses.jl. Please see the [JumpProcesses +documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive +introduction to the package and its available solvers. + +### [Brief (and optional) introduction to stochastic chemical kinetics jump simulations](@id jump_simulation_guide_intro) +Jump processes are continuous-time, discrete-space, stochastic processes. Exact +realizations of these processes can be generated using SSAs (of which +Gillespie's Direct method is the most well-known choice). In the chemical +reaction modelling context, the discrete-state variables typically correspond to +the integer-valued number of each chemical species at each time. A system's +state changes at discrete time points corresponding to when reactions occur (the +jump times). A these these times the amount of one (or more) species are changed +by integer amount(s) (for example, the creation of a new protein due to +translation, or the removal of one protein due to degradation). + +The frequency of each reaction's occurrence depends on its *propensity* function +(which in turn depends on its *rate constant* and *substrate* amounts). The +propensity of a reaction is analogous to the reaction's *rate law* in the ODE +context, and represents the probability per unit time the reaction can occur +given the current state of the system. In probability and statistics +propensities are also often called intensity functions or transition rate +functions. For example, the reaction $A + A \overset{k}{\to} B$ has a rate +constant of $k$ and a propensity of $k A (A-1) / 2$, while the reaction $A + B +\overset{\gamma}{\to} C + D$ has a rate constant of $\gamma$ and a propensity of +$k A B$. See [Reaction rate laws used in simulations](@ref +introduction_to_catalyst_ratelaws) for more details of what propensity functions +Catalyst generates for a given reaction, and [Mathematical Models Catalyst can +Generate](@ref math_models_in_catalyst) for details on both propensity functions +and the mathematical jump process models Catalyst can generate from a CRN (called the time-change representation in the stochastic chemical kinetics literature). + +During a typical simulation of a jump process model, the simulation algorithm +samples both the time to the next reaction and which reaction will occur at this +time point. The sampled time determines the timestep within a simulation, while +the sampled reaction determines how the state is updated after stepping to the +new time. Both are computed by sampling probability distributions that depend on +the current values for the reaction propensities. More precisely, in the +notation of the [Mathematical Models Catalyst can Generate](@ref +math_models_in_catalyst) guide, when the current time is $t$, the current system +state is $X(t)$ (i.e. the vector of the number of each chemical species), and +the current propensity values are $\{a_k(X(t))\}_{k=1}^K$, a typical timestep of +an SSA +1. Samples both a *random* time $\Delta t$ until the next reaction occurs and + which reaction, $k \in \{1,\dots,K\}$, occurs at this time. This sampling + process uses probability distributions that are built from the current + propensity values, i.e. $\{a_k(X(t))\}_{k=1}^K$. +2. Executes the $k$'th reaction calculating $X(t + \Delta t)$ from $X(t)$. +3. Recalculates one or more propensity functions, $a_k(X(t + \Delta + t))$, based on the new state. +4. Updates the time $t \to t + \Delta t$. + +Hence, the average work per one time step of a jump simulation is heavily +dependent on how many propensities must typically be recomputed when a reaction +occurs. Similarly, the total work across a full jump simulation will also depend +on typical values for the timestep. Propensity function values generally +increase as populations increase (i.e. for $A + B \overset{k}{\to} C$ the +propensity is $k A B$ which increases in the amount of $A$ or $B$). Larger +propensities mean that reactions occur more frequently, and hence typical values +for $\Delta t$ will decrease and more timesteps will be taken when simulating to +a fixed (physical) time, $T$. + +A variety of factors can therefore impact the run time of different SSAs. Such +simulations can become increasingly expensive for: +- Simulations of large models (i.e. with many different species, where some + species occur in large copy numbers, or where many reactions are present in a + system). +- Simulations over long time spans or with large numbers of reactions occurring + over the desired time span. +- Simulations that are performed a large number of times for statistical sampling. +- Simulations that are saving large amounts of data (such as saving the values + of all species each time a reaction occurs). + +A more thorough overview of simulation methods for Stochastic Chemical Kinetics +jump process models and their computational efficiency is given in [^1]. + +## [Basic simulation behavior] + + +## [Managing of solution saving](@id jump_simulation_guide_solution_saving) +By default, `solve` saves the value of the solution at the time of every jump. For simulations with a large number of jump events, this can cause memory to quickly fill up. Typically, for simulations with a large number of jumps, we want to [disable this feature](https://docs.sciml.ai/JumpProcesses/dev/tutorials/discrete_stochastic_example/#save_positions_docs) and instead set the save frequency manually. Let us consider a simple [birth-death model](@ref basic_CRN_library_bd): +```@example jump_simulation_guide_1 +using Catalyst, JumpProcesses, Plots + +bd_model = @reaction_network begin + (p,d), 0 <--> X +end +u0 = [:X => 10] +tspan = (0.0, 1000.0) +ps = [:p => 1000.0, :d => 100.0] + +dprob = DiscreteProblem(bd_model, u0, tspan, ps) +nothing # hide +``` +Let us simulate it using the default options, and plot the results. Furthermore, we use [BenchmarkTools.jl's](https://github.com/JuliaCI/BenchmarkTools.jl) `@btime` macro to measure the time it takes to plot the output solution: +```@example jump_simulation_guide_1 +using BenchmarkTools +jprob = JumpProblem(bd_model, dprob, Direct()) +sol = solve(jprob, SSAStepper()) +sol = solve(jprob, SSAStepper(); seed = 1234) # hide +@btime plot(sol) +``` +This simulation generates a very large number of jumps, with the solution saved and plotted at each time such a jump occurs. If the number of jumps is high enough, the memory limits may be exceeded (causing Julia to crash). Here, we do not reach this limit, however, the run time of the `plot(sol)` is affected by the number of points it must render (rendering too much in a single plot is another potential cause of crashed due to memory strain). + +Next, we provide the `save_positions = (false, false)` option to `JumpProblem`. This turns off the saving of the solution both before and after each jump. +```@example jump_simulation_guide_1 +jprob = JumpProblem(bd_model, dprob, Direct(); save_positions = (false, false)) +nothing # hide +``` +However, if we were to simulate our model now, the solution would only actually be saved at its initial and final times. To remedy this, we provide the `saveat = 1.0` argument to the `solve` command, ensuring that the solution to be saved at every `1.0`th time unit (that is, at `t` = `0.0`, `1.0`, `2.0`, ... `999.0`, `1000.0`). +```@example jump_simulation_guide_1 +sol = solve(jprob, SSAStepper(); saveat = 1.0) +sol = solve(jprob, SSAStepper(); saveat = 1.0, seed = 1234) # hide +nothing # hide +``` +we can now plot the new solution: +```@example jump_simulation_guide_1 +@btime plot(sol) +``` +Here, we note that the time to plot the simulation was reduced (for this example, admittedly from an already low level). Furthermore, the plots look different since the trajectory is sampled at much sparser time points. + +!!! note + With the default saving behaviour, [evaluating `sol(t)`](@ref simulation_structure_interfacing_solutions) at any time `t` within the `tspan` gives the *exact* value of the solution at that time (i.e. the vector of the exact number of each species). When using `saveat` and `save_positions = (false,false)` the solution is only saved at the selected time points, and as such `sol(t)` should not be evaluated except at times at which the solution was saved. While evaluating the solution at other times will return values they will not be the exact value of the jump process! + +## [Types of jumps](@id jump_simulation_guide_jump_types) +Each reaction in a chemical reaction network model corresponds to a possible jump of the jump simulation. These jumps can be divided into 3 categories: +- `MassActionJump`s: These correspond to reactions which rates remain constant throughout the simulation.They are typically generated by reactions' which rates contain time-independent parameters only. +- `ConstantRateJump`s: These correspond to reactions which rates remain constant between individual jumps, but may change in response to a jump occurring. They are typically generated by reactions' which rates contain species and time-independent parameters only. +- `VariableRateJump`s: These correspond to reactions which rates may change at any time during the simulation. They are typically generated by reactions' which rates contain the time variable ($t$). + +Here are some example reactions for the different types of jumps: +```@example jump_simulation_guide_1 +# `MassActionJump`s +@reaction 1.0, X --> Y +@reaction k, 2X + Y --> X2Y +@reaction k1*k2+k3, 0 --> X + +# `ConstantRateJump`s +@reaction k*log(X), Y --> X +@reaction mm(X,v,K), 0 --> X + +# `VariableRateJump`s +@reaction k*(1+sin(t)), 0 --> X +@reaction X/t, X + Y --> XY +nothing # hide +``` +Updating `MassActionJump`s' propensities is more computationally efficient (due to their constrained form) than updating them for `ConstantRateJump`s. Thus simulations are more performant if a larger fraction of reactions are of the `MassActionJump` form (rather than the `ConstantRateJump` form). Furthermore, simulations containing `VariableRateJump` requires additional routines to find the times between jump events (as unlike non-`VariableRateJump` simulations, jump propensities change *between* jump events, making this computation more difficult). Hence, the existence of `VariableRateJump` can significantly slow down a simulation Primarily, there exist two common situation where models are written in a way so that sub-optimal jump types are generated, both of which we describe below. + +### [Unnecessarily putting species in rates rather than in reactions](@id jump_simulation_guide_jump_types_unnecessary_constantratejumps) +Sometimes, a rate has been made dependent on a species, where that species instead could have been made part of the actual reaction. Consider a reaction where an enzyme ($E$) catalyses the phosphorylation of a protein ($X$) to phosphorylated form ($Xᵖ$). It can be written in two different forms: +```@example jump_simulation_guide_1 +r1 = @reaction k*E, X --> Xᵖ +r2 = @reaction k, X + E --> Xᵖ + E +nothing # hide +``` +These two reactions will generate identical simulations (this holds for ODE, SDE, and Jump simulations). However, while `r1` will generate a `ConstantRateJump`, `r2` will generate a `MassActionJump`. Hence, if the `r2` form is used, jump simulation performance is (at no cost) improved. Since the two forms are otherwise identical, it is always preferable to, whenever possible, put species in the reactions, rather than the rates. + +### [Using piecewise constant, time-dependant, function instead of events](@id jump_simulation_guide_jump_types_unnecessary_variableratejumps) +Let us consider a reaction with some input that depends on time. We assume that the input is initially $0$, but after some critical time $t = 5.0$ it is increased to $1$. This can be implemented through a step function: +```@example jump_simulation_guide_1 +input(t) = t > 5.0 +r = @reaction input(t), 0 --> X +nothing # hide +``` +Here, the production of species $X$ is switched on at the time $t = 5.0$. This reaction (for which the rate depends on `t`) will generate a `VariableRateJump`, which is the least performant jump type in simulations. However, since the rate is piecewise constant, it can alternatively be implemented by setting it to a constant parameter $i$, and then using a [*discrete event*](@ref constraint_equations_events) to update it at the switching times. This will again generate an equivalent model, but with the reaction encoded as a `MassActionJump` (rather than a `VariableRateJump`). Generally such explicit time-discontinuities should be encoded via discrete callbacks instead of as `VariableRateJump`s if possible (as simulation methods for the latter typically assume the system's propensities evolve continuously in-between jumps). + +## [Jump solver selection](@id jump_simulation_guide_solver_selection) +When creating a `JumpProblem`, a specific solver is designated using its third argument. +```@example jump_simulation_guide_2 +using Catalyst, JumpProcesses, Plots + +bd_model = @reaction_network begin + (p,d), 0 <--> X +end +u0 = [:X => 10] +tspan = (0.0, 1000.0) +ps = [:p => 1000.0, :d => 100.0] + +dprob = DiscreteProblem(bd_model, u0, tspan, ps) +jprob = JumpProblem(bd_model, dprob, Direct()) +nothing # hide +``` +Here (as throughout most of Catalyst's documentation) we have used the `Direct()` SSA solver (which corresponds to Gillespie's original direct method [^2][^3]). This method was originally published in 1976, and since then, many additional methods for simulating stochastic chemical kinetics models have been developed. + +Gillespie's direct method will, after a jump has been performed, recompute the propensities of *all* possible jumps in the system (i.e. of all reactions). This is typically not required. E.g. consider the following system: +```@example jump_simulation_guide_2 +@reaction_network begin + k1, X1 --> X2 + k2, X2 --> X3 + k3, X3 --> 0 +end +``` +Here, the propensities of the `k1, X1 --> X2` and `k2, X2 --> X3` reactions (`k1*X1*X2` and `k2*X2` respectively) do not depend on the amount of $X3$ in the system. Hence, the value of their propensities are unchanged by the occurrence of the `k3, X3 --> 0` reaction. Performant jump simulation methods have several ways to determine which rates require recomputing after the occurrence of a reaction, which improves their computational performance. Many of these requires a dependency graphs, which track which other reactions' propensities must be recomputed after the occurrence of a given reaction. Catalyst automatically builds such dependency graphs, which means that all JumpProcesses SSAs can be used without any additional inputs. + +A full list of jump simulation method implemented by JumpProcesses can be found [here](https://docs.sciml.ai/JumpProcesses/stable/jump_types/#Jump-Aggregators-for-Exact-Simulation). Generally, `RSSA()` (the rejection SSA method [^4][^5]) is recommended for small models, with `RSSACR()` (the rejection SSA with composition-rejection method [^6]) typically being more performant for larger models. For models that are simulated a large number of times, it can be worthwhile to try a few different jump simulation methods to determine which one is most performant in each given case. + +## [Hybrid simulations](@id jump_simulation_guide_hybrid_simulations) +For some models, copy numbers may vary greatly between different species. E.g. consider a genetic promoter which can either be in an inactive form ($Pᵢ$) or an active form ($Pₐ$). The active promoter produces a molecule ($M$): +```@example jump_simulation_guide_3 +using Catalyst # hide +promoter = @reaction_network begin + (kA,kI), Pᵢ <--> Pₐ + p, Pₐ --> Pₐ + M + d, M --> ∅ +end +``` +Let us simulate this model and consider the copy numbers of each individual component: +```@example jump_simulation_guide_3 +using JumpProcesses, Plots # hide +u0 = [:Pᵢ => 1, :Pₐ => 0, :M => 10000] +tspan = (0.0, 5000.0) +p = [:kA => 0.05, :kI => .01, :p => 500.0, :d => 0.0005] + +dprob = DiscreteProblem(promoter, u0, tspan, p) +jprob = JumpProblem(promoter, dprob, RSSA()) +sol = solve(jprob, SSAStepper()) +sol = solve(jprob, SSAStepper(); seed = 1234) # hide + +plt_1 = plot(sol; idxs=2) +plt_2 = plot(sol; idxs=3) +plot(plt_1, plt_2, size=(1200, 400)) +``` +We note that the copy numbers $Pₐ$ are highly stochastic. Meanwhile, $M$ exists in so large copy numbers that its trajectory can be considered approximately deterministic. For models like this one, jump simulations are required to capture the stochastic behaviour of the promoter. However, the large number of jumps generated by $M$ makes such simulations excessively expensive. Here, *hybrid simulations* can be used. Hybrid simulations employ different simulation strategies for their different components. In our example, the state of the promoter could be simulated using a jump simulation, while $M$ could be simulated using an ODE. + +Hybrid simulations for Catalyst models are currently not supported, though they are supported by the lower-level solvers in JumpProcesses and OrdinaryDiffEq. It is work in progress to interface Catalyst to such lower-level solvers, and to provide better optimised hybrid methods. If you require such simulations, please [raise an issue](https://github.com/SciML/Catalyst.jl/issues) and we can notify you of the current state of our implementation, or give suggestions on how one can directly interface with the lower-level hybrid solver interface. + +## [Simulation parallelisation](@id jump_simulation_guide_parallelisation) +When multiple simulations are carried out, simulations can be improved by running these in parallel. We have [previously described](@ref ode_simulation_performance_parallelisation) how to do so for ODE simulation. Jump simulations can be parallelised in the same manner, with the exception that GPU parallelisation is currently not supported. + +--- +## References +[^1]: [L. Marchetti, C. Priami, V. H. Thanh, *Simulation Algorithms for Computational Systems Biology*, Springer (2017).](https://link.springer.com/book/10.1007/978-3-319-63113-4) +[^2]: [D. T. Gillespie, *A general method for numerically simulating the stochastic time evolution of coupled chemical reactions*, Journal of Computational Physics (1976).](https://www.sciencedirect.com/science/article/abs/pii/0021999176900413) +[^3]: [D. T. Gillespie, *Exact Stochastic Simulation of Coupled Chemical Reactions*, The Journal of Physical Chemistry (1977).](https://pubs.acs.org/doi/10.1021/j100540a008) +[^4]: [V. H. Thanh, C. Priami and R. Zunino, *Efficient rejection-based simulation of biochemical reactions with stochastic noise and delays*, Journal of Chemical Physics (2014).](https://pubmed.ncbi.nlm.nih.gov/25296793/) +[^5]: [V. H. Thanh, R. Zunino and C. Priami, *On the rejection-based algorithm for simulation and analysis of large-scale reaction networks*, Journal of Chemical Physics (2015).](https://pubmed.ncbi.nlm.nih.gov/26133409/) +[^6]: [V. H. Thanh, R. Zunino, and C. Priami, *Efficient constant-time complexity algorithm for stochastic simulation of large reaction networks*, IEEE/ACM Transactions on Computational Biology and Bioinformatics (2017).](https://pubmed.ncbi.nlm.nih.gov/26890923/) diff --git a/docs/src/model_simulation/simulation_introduction.md b/docs/src/model_simulation/simulation_introduction.md index dfeed94e39..ebdf37a310 100644 --- a/docs/src/model_simulation/simulation_introduction.md +++ b/docs/src/model_simulation/simulation_introduction.md @@ -1,7 +1,7 @@ # [Model Simulation Introduction](@id simulation_intro) Catalyst's core functionality is the creation of *chemical reaction network* (CRN) models that can be simulated using ODE, SDE, and jump simulations. How such simulations are carried out has already been described in [Catalyst's introduction](@ref introduction_to_catalyst). This page provides a deeper introduction, giving some additional background and introducing various simulation-related options. -Here we will focus on the basics, with other sections of the simulation documentation describing various specialised features, or giving advice on performance. Anyone who plans on using Catalyst's simulation functionality extensively is recommended to also read the documentation on [solution plotting](@ref simulation_plotting), and on how to [interact with simulation problems, integrators, and solutions](@ref simulation_structure_interfacing). Anyone with an application for which performance is critical should consider reading the corresponding page on performance advice for [ODE](@ref ode_simulation_performance), [SDE](@ref sde_simulation_performance), or [jump](@ref jump_simulation_performance) simulations. +Here we will focus on the basics, with other sections of the simulation documentation describing various specialised features, or giving advice on performance. Anyone who plans on using Catalyst's simulation functionality extensively is recommended to also read the documentation on [solution plotting](@ref simulation_plotting), and on how to [interact with simulation problems, integrators, and solutions](@ref simulation_structure_interfacing). Anyone with an application for which performance is critical should consider reading the corresponding page on performance advice for [ODE](@ref ode_simulation_performance), [SDE](@ref sde_simulation_performance), or [jump](@ref jump_simulation_guide) simulations. ### [Background to CRN simulations](@id simulation_intro_theory) This section provides some brief theory on CRN simulations. For details on how to carry out these simulations in actual code, please skip to the following sections. @@ -290,7 +290,7 @@ While the `@default_noise_scaling` option is unavailable for [programmatically c ## [Performing jump simulations using stochastic chemical kinetics](@id simulation_intro_jumps) -Catalyst uses the [JumpProcesses.jl](https://github.com/SciML/JumpProcesses.jl) package to perform jump simulations. This section provides a brief introduction, with [JumpProcesses's documentation](https://docs.sciml.ai/JumpProcesses/stable/) providing a more extensive description. A dedicated section giving advice on how to optimise jump simulation performance can be found [here](@ref jump_simulation_performance). +Catalyst uses the [JumpProcesses.jl](https://github.com/SciML/JumpProcesses.jl) package to perform jump simulations. This section provides a brief introduction, with [JumpProcesses's documentation](https://docs.sciml.ai/JumpProcesses/stable/) providing a more extensive description. A dedicated section giving advice on how to optimise jump simulation performance can be found [here](@ref jump_simulation_guide). Jump simulations are performed using so-called `JumpProblem`s. Unlike ODEs and SDEs (for which the corresponding problem types can be created directly), jump simulations require first creating an intermediary `DiscreteProblem`. In this example, we first declare our two-state model and its initial conditions, time span, and parameter values. ```@example simulation_intro_jumps diff --git a/docs/src/model_simulation/jump_simulation_performance.md b/docs/unpublished/jump_simulation_performance.md similarity index 96% rename from docs/src/model_simulation/jump_simulation_performance.md rename to docs/unpublished/jump_simulation_performance.md index 7f525edd9a..cee61a90b9 100644 --- a/docs/src/model_simulation/jump_simulation_performance.md +++ b/docs/unpublished/jump_simulation_performance.md @@ -1,14 +1,14 @@ -# [Advice for performant jump simulations](@id jump_simulation_performance) +# [Advice for Stochastic Chemical Kinetics Jump Simulations](@id jump_simulation_performance) We have previously introduced how Catalyst *chemical reaction network* (CRN) models can be converted to *stochastic chemical kinetics* jump process models, which can then be (exactly) simulated (i.e. sampled) using Stochastic Simulation -Algorithms (SSAs) such as Gillespie's Direct method. These simulations can, -however, be very computationally intensive. Fortunately, there are several ways -to increase their performance (thus reducing run time). This tutorial describes -various considerations for performant stochastic chemical kinetics simulations -(which we will here refer to as jump simulations). All jump simulations arising -from stochastic chemical kinetics representations of Catalyst models are -performed using SSAs from JumpProcesses.jl. Please see the [JumpProcesses +Algorithms (SSAs) such as Gillespie's Direct method. + +In this tutorial we will review basic information and configuration options for +such simulations, and discuss several ways to increase their performance (thus +reducing run time). All jump simulations arising from stochastic chemical +kinetics representations of Catalyst models are performed using SSAs from +JumpProcesses.jl. Please see the [JumpProcesses documentation](https://github.com/SciML/JumpProcesses.jl) for a more extensive introduction to the package and its available solvers.