TuringLang · zuhengxu · Jul 11, 2023 · Jul 11, 2023 · Jul 11, 2023 · Jul 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 /docs/build/
 test/Manifest.toml
 example/Manifest.toml
+example/LocalPreferences.toml
 
 # Files generated by invoking Julia with --code-coverage
 *.jl.cov

diff --git a/Project.toml b/Project.toml
@@ -8,7 +8,9 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

diff --git a/example/Project.toml b/example/Project.toml
@@ -2,16 +2,22 @@
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-FunctionChains = "8e6b2b91-af83-483e-ba35-d00930e4cf9b"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 NormalizingFlows = "50e4474d-9f12-44b7-af7a-91ab30ff6256"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+
+[extras]
+CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
diff --git a/example/README.md b/example/README.md
@@ -12,7 +12,7 @@ normalizing flow to approximate the target distribution using `NormalizingFlows.
 Currently, all examples share the same [Julia project](https://pkgdocs.julialang.org/v1/environments/#Using-someone-else's-project). To run the examples, first activate the project environment:
 
 ```julia
-# pwd() = "NormalizingFlows.jl/"
-using Pkg; Pkg.activate("example"); Pkg.instantiate()
+# pwd() = "NormalizingFlows.jl/example"
+using Pkg; Pkg.activate("."); Pkg.instantiate()
 ```
-This will install all needed packages, at the exact versions when the model was last updated. Then you can run the model code with include("<example-to-run>.jl"), or by running the example script line-by-line.
+This will install all needed packages, at the exact versions when the model was last updated. Then you can run the model code with `include("<example-to-run>.jl")`, or by running the example script line-by-line.
diff --git a/example/SyntheticTargets.jl b/example/SyntheticTargets.jl
@@ -0,0 +1,34 @@
+using DocStringExtensions
+using Distributions, Random, LinearAlgebra
+using IrrationalConstants
+using Plots
+
+
+include("targets/banana.jl")
+include("targets/cross.jl")
+include("targets/neal_funnel.jl")
+include("targets/warped_gaussian.jl")
+
+
+function load_model(name::String)
+    if name == "Banana"
+        return Banana(2, 1.0, 10.0)
+    elseif name == "Cross"
+        return Cross()
+    elseif name == "Funnel"
+        return Funnel(2)
+    elseif name == "WarpedGaussian"
+        return WarpedGauss()
+    else
+        error("Model not defined")
+    end
+end
+
+function visualize(p::ContinuousMultivariateDistribution, samples=rand(p, 1000))
+    xrange = range(minimum(samples[1, :]) - 1, maximum(samples[1, :]) + 1; length=100)
+    yrange = range(minimum(samples[2, :]) - 1, maximum(samples[2, :]) + 1; length=100)
+    z = [exp(Distributions.logpdf(p, [x, y])) for x in xrange, y in yrange]
+    fig = contour(xrange, yrange, z'; levels=15, color=:viridis, label="PDF", linewidth=2)
+    scatter!(samples[1, :], samples[2, :]; label="Samples", alpha=0.3, legend=:bottomright)
+    return fig
+end
diff --git a/example/common.jl b/example/common.jl
diff --git a/example/demo_RealNVP.jl b/example/demo_RealNVP.jl
@@ -0,0 +1,163 @@
+using Flux
+using Bijectors
+using Bijectors: partition, combine, PartitionMask
+
+using Random, Distributions, LinearAlgebra
+using Functors
+using Optimisers, ADTypes
+using Mooncake
+using NormalizingFlows
+
+include("SyntheticTargets.jl")
+include("utils.jl")
+
+##################################
+# define affine coupling layer using Bijectors.jl interface
+#################################
+struct AffineCoupling <: Bijectors.Bijector
+    dim::Int
+    mask::Bijectors.PartitionMask
+    s::Flux.Chain
+    t::Flux.Chain
+end
+
+# let params track field s and t
+@functor AffineCoupling (s, t)
+
+function AffineCoupling(
+    dim::Int,  # dimension of input
+    hdims::Int, # dimension of hidden units for s and t
+    mask_idx::AbstractVector, # index of dimensione that one wants to apply transformations on
+)
+    cdims = length(mask_idx) # dimension of parts used to construct coupling law
+    s = mlp3(cdims, hdims, cdims)
+    t = mlp3(cdims, hdims, cdims)
+    mask = PartitionMask(dim, mask_idx)
+    return AffineCoupling(dim, mask, s, t)
+end
+
+function Bijectors.transform(af::AffineCoupling, x::AbstractVector)
+    # partition vector using 'af.mask::PartitionMask`
+    x₁, x₂, x₃ = partition(af.mask, x)
+    y₁ = x₁ .* af.s(x₂) .+ af.t(x₂)
+    return combine(af.mask, y₁, x₂, x₃)
+end
+
+function (af::AffineCoupling)(x::AbstractArray)
+    return transform(af, x)
+end
+
+function Bijectors.with_logabsdet_jacobian(af::AffineCoupling, x::AbstractVector)
+    x_1, x_2, x_3 = Bijectors.partition(af.mask, x)
+    y_1 = af.s(x_2) .* x_1 .+ af.t(x_2)
+    logjac = sum(log ∘ abs, af.s(x_2))
+    return combine(af.mask, y_1, x_2, x_3), logjac
+end
+
+function Bijectors.with_logabsdet_jacobian(
+    iaf::Inverse{<:AffineCoupling}, y::AbstractVector
+)
+    af = iaf.orig
+    # partition vector using `af.mask::PartitionMask`
+    y_1, y_2, y_3 = partition(af.mask, y)
+    # inverse transformation
+    x_1 = (y_1 .- af.t(y_2)) ./ af.s(y_2)
+    logjac = -sum(log ∘ abs, af.s(y_2))
+    return combine(af.mask, x_1, y_2, y_3), logjac
+end
+
+function Bijectors.logabsdetjac(af::AffineCoupling, x::AbstractVector)
+    _, x_2, _ = partition(af.mask, x)
+    logjac = sum(log ∘ abs, af.s(x_2))
+    return logjac
+end
+
+################### 
+# an equivalent definition of AffineCoupling using Bijectors.Coupling 
+# (see https://github.com/TuringLang/Bijectors.jl/blob/74d52d4eda72a6149b1a89b72524545525419b3f/src/bijectors/coupling.jl#L188C1-L188C1)
+###################
+
+# struct AffineCoupling <: Bijectors.Bijector
+#     dim::Int
+#     mask::Bijectors.PartitionMask
+#     s::Flux.Chain
+#     t::Flux.Chain
+# end
+
+# # let params track field s and t
+# @functor AffineCoupling (s, t)
+
+# function AffineCoupling(dim, mask, s, t)
+#     return Bijectors.Coupling(θ -> Bijectors.Shift(t(θ)) ∘ Bijectors.Scale(s(θ)), mask)
+# end
+
+# function AffineCoupling(
+#     dim::Int,  # dimension of input
+#     hdims::Int, # dimension of hidden units for s and t
+#     mask_idx::AbstractVector, # index of dimensione that one wants to apply transformations on
+# )
+#     cdims = length(mask_idx) # dimension of parts used to construct coupling law
+#     s = mlp3(cdims, hdims, cdims)
+#     t = mlp3(cdims, hdims, cdims)
+#     mask = PartitionMask(dim, mask_idx)
+#     return AffineCoupling(dim, mask, s, t)
+# end
+
+
+
+##################################
+# start demo
+#################################
+Random.seed!(123)
+rng = Random.default_rng()
+T = Float32
+
+######################################
+# a difficult banana target
+######################################
+target = Banana(2, 1.0f0, 100.0f0)
+logp = Base.Fix1(logpdf, target)
+
+######################################
+# learn the target using Affine coupling flow
+######################################
+@leaf MvNormal
+q0 = MvNormal(zeros(T, 2), ones(T, 2))
+
+d = 2
+hdims = 32
+Ls = [AffineCoupling(d, hdims, [1]) ∘ AffineCoupling(d, hdims, [2]) for i in 1:3]
+
+flow = create_flow(Ls, q0)
+flow_untrained = deepcopy(flow)
+
+
+######################################
+# start training
+######################################
+sample_per_iter = 64
+
+# callback function to log training progress
+cb(iter, opt_stats, re, θ) = (sample_per_iter=sample_per_iter,ad=adtype)
+adtype = ADTypes.AutoMooncake(; config = Mooncake.Config())
+checkconv(iter, stat, re, θ, st) = stat.gradient_norm < one(T)/1000
+flow_trained, stats, _ = train_flow(
+    elbo,
+    flow,
+    logp,
+    sample_per_iter;
+    max_iters=50_000,
+    optimiser=Optimisers.Adam(5e-4),
+    ADbackend=adtype,
+    show_progress=true,
+    callback=cb,
+    hasconverged=checkconv,
+)
+θ, re = Optimisers.destructure(flow_trained)
+losses = map(x -> x.loss, stats)
+
+######################################
+# evaluate trained flow
+######################################
+plot(losses; label="Loss", linewidth=2) # plot the loss
+compare_trained_and_untrained_flow(flow_trained, flow_untrained, target, 1000)