Implementation of Robust Adaptive Metropolis (#106)

torfjelde · mhauru · devmotion · web-flow · commit ef34dcda818c · 2024-12-11T08:57:03.000Z
* added an initial implementation of `RAM` * added proper docs for RAM * fixed doctest for `RAM` + added impls of `getparams` and `setparams!!` * added DocStringExtensions as a dep * bump patch version * attempt at making the dcotest a bit more consistent * a * added checks for eigenvalues according to p. 13 in Vihola (2012) (in preivous commit) * fixed default value for `eigenvalue_lower_bound` * applied suggestions from @mhauru * more doctesting of RAM + improved docstrings * added docstring for `RAMState` * added proper testing of RAM * Update src/RobustAdaptiveMetropolis.jl Co-authored-by: Markus Hauru <markus@mhauru.org> * added compat entries to docs * apply suggestions from @devmotion Co-authored-by: David Widmann <devmotion@users.noreply.github.com> * renamed `RAM` to `RobostMetropolisHastings` + removed the separate module for this * formatting * made the docstring for RAM a bit nicer * fixed doctest * formatting * minor improvement to docstring of RAM * fused scalar operations * added dimensionality check of the provided `S` matrix * fixed typo * Update docs/src/api.md Co-authored-by: David Widmann <devmotion@users.noreply.github.com> * use `randn` instead of `rand` for initialisation * added an explanation of the `min` * Update test/RobustAdaptiveMetropolis.jl Co-authored-by: David Widmann <devmotion@users.noreply.github.com> * use explicit `Cholesky` constructor for backwards compat * Fix typo: ```` -> ``` * formatted according to `blue` * Update src/RobustAdaptiveMetropolis.jl Co-authored-by: Markus Hauru <markus@mhauru.org> --------- Co-authored-by: Markus Hauru <markus@mhauru.org> Co-authored-by: David Widmann <devmotion@users.noreply.github.com> Co-authored-by: Tor Fjelde <tor@Tors-MacBook-Pro.local> Co-authored-by: Markus Hauru <mhauru@turing.ac.uk>
diff --git a/Project.toml b/Project.toml
@@ -1,10 +1,11 @@
 name = "AdvancedMH"
 uuid = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170"
-version = "0.8.4"
+version = "0.8.5"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
@@ -26,6 +27,7 @@ AdvancedMHStructArraysExt = "StructArrays"
 AbstractMCMC = "5.6"
 DiffResults = "1"
 Distributions = "0.25"
+DocStringExtensions = "0.9"
 FillArrays = "1"
 ForwardDiff = "0.10"
 LinearAlgebra = "1.6"
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,5 +1,15 @@
 [deps]
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [compat]
 Documenter = "1"
+Distributions = "0.25"
+LinearAlgebra = "1.6"
+LogDensityProblems = "2"
+MCMCChains = "6.0.4"
+Random = "1.6"
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -12,3 +12,9 @@ MetropolisHastings
 ```@docs
 DensityModel
 ```
+
+## Samplers
+
+```@docs
+RobustAdaptiveMetropolis
+```
diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl
@@ -3,8 +3,9 @@ module AdvancedMH
 # Import the relevant libraries.
 using AbstractMCMC
 using Distributions
-using LinearAlgebra: I
+using LinearAlgebra: LinearAlgebra, I
 using FillArrays: Zeros
+using DocStringExtensions: FIELDS
 
 using LogDensityProblems: LogDensityProblems
 
@@ -22,7 +23,8 @@ export
     SymmetricRandomWalkProposal,
     Ensemble,
     StretchProposal,
-    MALA
+    MALA,
+    RobustAdaptiveMetropolis
 
 # Reexports
 export sample, MCMCThreads, MCMCDistributed, MCMCSerial
@@ -159,5 +161,6 @@ include("proposal.jl")
 include("mh-core.jl")
 include("emcee.jl")
 include("MALA.jl")
+include("RobustAdaptiveMetropolis.jl")
 
 end # module AdvancedMH
diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl
@@ -0,0 +1,278 @@
+# TODO: Should we generalise this arbitrary symmetric proposals?
+"""
+    RobustAdaptiveMetropolis
+
+Robust Adaptive Metropolis-Hastings (RAM).
+
+This is a simple implementation of the RAM algorithm described in [^VIH12].
+
+# Fields
+
+$(FIELDS)
+
+# Examples
+
+The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm.
+
+```jldoctest ram-gaussian; setup=:(using Random; Random.seed!(1234);)
+julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra
+
+julia> # Define a Gaussian with zero mean and some covariance.
+       struct Gaussian{A}
+           Σ::A
+       end
+
+julia> # Implement the LogDensityProblems interface.
+       LogDensityProblems.dimension(model::Gaussian) = size(model.Σ, 1)
+
+julia> function LogDensityProblems.logdensity(model::Gaussian, x)
+           d = LogDensityProblems.dimension(model)
+           return logpdf(MvNormal(zeros(d),model.Σ), x)
+       end
+
+julia> LogDensityProblems.capabilities(::Gaussian) = LogDensityProblems.LogDensityOrder{0}()
+
+julia> # Construct the model. We'll use a correlation of 0.5.
+       model = Gaussian([1.0 0.5; 0.5 1.0]);
+
+julia> # Number of samples we want in the resulting chain.
+       num_samples = 10_000;
+
+julia> # Number of warmup steps, i.e. the number of steps to adapt the covariance of the proposal.
+       # Note that these are not included in the resulting chain, as `discard_initial=num_warmup`
+       # by default in the `sample` call. To include them, pass `discard_initial=0` to `sample`.
+       num_warmup = 10_000;
+
+julia> # Sample!
+       chain = sample(
+            model,
+            RobustAdaptiveMetropolis(),
+            num_samples;
+            chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)
+        );
+
+julia> isapprox(cov(Array(chain)), model.Σ; rtol = 0.2)
+true
+```
+
+It's also possible to restrict the eigenvalues to avoid either too small or too large values. See p. 13 in [^VIH12].
+
+```jldoctest ram-gaussian
+julia> chain = sample(
+           model,
+           RobustAdaptiveMetropolis(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0),
+           num_samples;
+           chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)
+       );
+
+julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2
+true
+```
+
+# References
+[^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing.
+"""
+Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T}}} <:
+                   AdvancedMH.MHSampler
+    "target acceptance rate. Default: 0.234."
+    α::T = 0.234
+    "negative exponent of the adaptation decay rate. Default: `0.6`."
+    γ::T = 0.6
+    "initial lower-triangular Cholesky factor of the covariance matrix. If specified, should be convertible into a `LowerTriangular`. Default: `nothing`, which is interpreted as the identity matrix."
+    S::A = nothing
+    "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`."
+    eigenvalue_lower_bound::T = 0.0
+    "upper bound on eigenvalues of the adapted Cholesky factor. Default: `Inf`."
+    eigenvalue_upper_bound::T = Inf
+end
+
+"""
+    RobustAdaptiveMetropolisState
+
+State of the Robust Adaptive Metropolis-Hastings (RAM) algorithm.
+
+See also: [`RobustAdaptiveMetropolis`](@ref).
+
+# Fields
+$(FIELDS)
+"""
+struct RobustAdaptiveMetropolisState{T1,L,A,T2,T3}
+    "current realization of the chain."
+    x::T1
+    "log density of `x` under the target model."
+    logprob::L
+    "current lower-triangular Cholesky factor."
+    S::A
+    "log acceptance ratio of the previous iteration (not necessarily of `x`)."
+    logα::T2
+    "current step size for adaptation of `S`."
+    η::T3
+    "current iteration."
+    iteration::Int
+    "whether the previous iteration was accepted."
+    isaccept::Bool
+end
+
+AbstractMCMC.getparams(state::RobustAdaptiveMetropolisState) = state.x
+function AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x)
+    return RobustAdaptiveMetropolisState(
+        x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept
+    )
+end
+
+function ram_step_inner(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.LogDensityModel,
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState,
+)
+    # This is the initial state.
+    f = model.logdensity
+    d = LogDensityProblems.dimension(f)
+
+    # Sample the proposal.
+    x = state.x
+    U = randn(rng, eltype(x), d)
+    x_new = muladd(state.S, U, x)
+
+    # Compute the acceptance probability.
+    lp = state.logprob
+    lp_new = LogDensityProblems.logdensity(f, x_new)
+    # Technically, the `min` here is unnecessary for sampling according to `min(..., 1)`.
+    # However, `ram_adapt` assumes that `logα` actually represents the log acceptance probability
+    # and is thus bounded at 0. Moreover, users might be interested in inspecting the average
+    # acceptance rate to check that the algorithm achieves something similar to the target rate.
+    # Hence, it's a bit more convenient for the user if we just perform the `min` here
+    # so they can just take an average of (`exp` of) the `logα` values.
+    logα = min(lp_new - lp, zero(lp))
+    isaccept = Random.randexp(rng) > -logα
+
+    return x_new, lp_new, U, logα, isaccept
+end
+
+function ram_adapt(
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState,
+    logα::Real,
+    U::AbstractVector,
+)
+    Δα = exp(logα) - sampler.α
+    S = state.S
+    # TODO: Make this configurable by defining a more general path.
+    η = state.iteration^(-sampler.γ)
+    ΔS = (η * abs(Δα)) * S * U / LinearAlgebra.norm(U)
+    # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it.
+    S_new = if sign(Δα) == 1
+        # One rank update.
+        LinearAlgebra.lowrankupdate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L
+    else
+        # One rank downdate.
+        LinearAlgebra.lowrankdowndate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L
+    end
+    return S_new, η
+end
+
+function AbstractMCMC.step(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.LogDensityModel,
+    sampler::RobustAdaptiveMetropolis;
+    initial_params=nothing,
+    kwargs...,
+)
+    # This is the initial state.
+    f = model.logdensity
+    d = LogDensityProblems.dimension(f)
+
+    # Initial parameter state.
+    T = if initial_params === nothing
+        eltype(sampler.γ)
+    else
+        Base.promote_type(eltype(sampler.γ), eltype(initial_params))
+    end
+    x = if initial_params === nothing
+        randn(rng, T, d)
+    else
+        convert(AbstractVector{T}, initial_params)
+    end
+    # Initialize the Cholesky factor of the covariance matrix.
+    S_data = if sampler.S === nothing
+        LinearAlgebra.diagm(0 => ones(T, d))
+    else
+        # Check the dimensionality of the provided `S`.
+        if size(sampler.S) != (d, d)
+            throw(ArgumentError("The provided `S` has the wrong dimensionality."))
+        end
+        convert(AbstractMatrix{T}, sampler.S)
+    end
+    S = LinearAlgebra.LowerTriangular(S_data)
+
+    # Construct the initial state.
+    lp = LogDensityProblems.logdensity(f, x)
+    state = RobustAdaptiveMetropolisState(x, lp, S, zero(T), 0, 1, true)
+
+    return AdvancedMH.Transition(x, lp, true), state
+end
+
+function AbstractMCMC.step(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.LogDensityModel,
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState;
+    kwargs...,
+)
+    # Take the inner step.
+    x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state)
+    # Accept / reject the proposal.
+    state_new = RobustAdaptiveMetropolisState(
+        isaccept ? x_new : state.x,
+        isaccept ? lp_new : state.logprob,
+        state.S,
+        logα,
+        state.η,
+        state.iteration + 1,
+        isaccept,
+    )
+    return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept),
+    state_new
+end
+
+function valid_eigenvalues(S, lower_bound, upper_bound)
+    # Short-circuit if the bounds are the default.
+    (lower_bound == 0 && upper_bound == Inf) && return true
+    # Note that this is just the diagonal when `S` is triangular.
+    eigenvals = LinearAlgebra.eigvals(S)
+    return all(x -> lower_bound <= x <= upper_bound, eigenvals)
+end
+
+function AbstractMCMC.step_warmup(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.LogDensityModel,
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState;
+    kwargs...,
+)
+    # Take the inner step.
+    x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state)
+    # Adapt the proposal.
+    S_new, η = ram_adapt(sampler, state, logα, U)
+    # Check that `S_new` has eigenvalues in the desired range.
+    if !valid_eigenvalues(
+        S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound
+    )
+        # In this case, we just keep the old `S` (p. 13 in Vihola, 2012).
+        S_new = state.S
+    end
+
+    # Update state.
+    state_new = RobustAdaptiveMetropolisState(
+        isaccept ? x_new : state.x,
+        isaccept ? lp_new : state.logprob,
+        S_new,
+        logα,
+        η,
+        state.iteration + 1,
+        isaccept,
+    )
+    return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept),
+    state_new
+end
diff --git a/test/RobustAdaptiveMetropolis.jl b/test/RobustAdaptiveMetropolis.jl
diff --git a/test/runtests.jl b/test/runtests.jl