differentiable adjacency_matrix and degree (#123)

CarloLucibello · web-flow · commit dd4a54c12b67 · 2022-02-24T09:10:07.000+01:00
* differentiable adjacency_matrix for coo

* fixes for degree

* differentiable adjacency_matrix for dense

* fix some cuda problems

* add binarize
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@
 *.jl.mem
 Manifest.toml
 /docs/build/
+.vscode
diff --git a/src/GNNGraphs/convert.jl b/src/GNNGraphs/convert.jl
@@ -29,9 +29,16 @@ function to_coo(A::SPARSE_T; dir=:out, num_nodes=nothing, weighted=true)
     return (s, t, v), num_nodes, num_edges
 end
 
-function to_coo(A::ADJMAT_T; dir=:out, num_nodes=nothing, weighted=true)
+function _findnz_idx(A)
     nz = findall(!=(0), A) # vec of cartesian indexes
     s, t = ntuple(i -> map(t->t[i], nz), 2)
+    return s, t, nz
+end
+
+@non_differentiable _findnz_idx(A)
+
+function to_coo(A::ADJMAT_T; dir=:out, num_nodes=nothing, weighted=true)
+    s, t, nz = _findnz_idx(A)
     v = A[nz] 
     if dir == :in
         s, t = t, s
@@ -115,16 +122,24 @@ function to_dense(coo::COO_T, T=nothing; dir=:out, num_nodes=nothing, weighted=t
     # The output will always be a adjmat in :out format (e.g. A[i,j] denotes from i to j)
     s, t, val = coo
     n::Int = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes
-    val = isnothing(val) ? eltype(s)(1) : val
-    T = T === nothing ? eltype(val) : T
-    if !weighted
-        val = T(1)
+    if T === nothing
+        T = isnothing(val) ? eltype(s) : eltype(val) 
     end
-    A = fill!(similar(s, T, (n, n)), 0)
-    v = vec(A) # vec view of A
+    if val === nothing || !weighted  
+        val = ones_like(s, T)            
+    end
+    if eltype(val) != T
+        val = T.(val)
+    end
+    
     idxs = s .+ n .* (t .- 1) 
+    
+    ## using scatter instead of indexing since there could be multiple edges
+    # A = fill!(similar(s, T, (n, n)), 0)
+    # v = vec(A) # vec view of A
     # A[idxs] .= val # exploiting linear indexing
-    NNlib.scatter!(+, v, val, idxs) # using scatter instead of indexing since there could be multiple edges 
+    v = NNlib.scatter(+, val, idxs, dstsize=n^2) 
+    A = reshape(v, (n, n))
     return A, n, length(s)
 end
 
@@ -172,7 +187,3 @@ function to_sparse(coo::COO_T, T=nothing; dir=:out, num_nodes=nothing, weighted=
     end
     return A, num_nodes, num_edges
 end
-
-@non_differentiable to_coo(x...)
-@non_differentiable to_dense(x...)
-@non_differentiable to_sparse(x...)
diff --git a/src/GNNGraphs/gatherscatter.jl b/src/GNNGraphs/gatherscatter.jl
@@ -7,3 +7,61 @@ _scatter(aggr, m::NamedTuple, t; dstsize=nothing) = map(m -> _scatter(aggr, m, t
 _scatter(aggr, m::Tuple, t; dstsize=nothing) = map(m -> _scatter(aggr, m, t; dstsize), m)
 _scatter(aggr, m::AbstractArray, t; dstsize=nothing) = NNlib.scatter(aggr, m, t; dstsize)
 _scatter(aggr, m::Nothing, t; dstsize=nothing) = nothing
+
+## TO MOVE TO NNlib ######################################################
+
+
+### Considers the src a zero dimensional object.
+### Useful for implementing `StatsBase.counts`, `degree`, etc...
+### function NNlib.scatter!(op, dst::AbstractArray, src::Number, idx::AbstractArray)
+###     for k in CartesianIndices(idx)
+###         # dst_v = NNlib._view(dst, idx[k])
+###         # dst_v .= (op).(dst_v, src)
+###         dst[idx[k]] .= (op).(dst[idx[k]], src)
+###     end
+###     dst
+### end
+
+# 10 times faster than the generic version above. 
+# All the speedup comes from not broadcasting `op`, i dunno why.
+# function NNlib.scatter!(op, dst::AbstractVector, src::Number, idx::AbstractVector{<:Integer})
+#     for i in idx
+#         dst[i] = op(dst[i], src)
+#     end
+# end
+
+## NNlib._view(X, k) = view(X, k...)
+## NNlib._view(X, k::Union{Integer, CartesianIndex}) = view(X,  k)
+#
+## Considers src as a zero dimensional object to be scattered
+## function NNlib.scatter(op,
+##                 src::Tsrc,
+##                 idx::AbstractArray{Tidx,Nidx};
+##                 init = nothing, dstsize = nothing) where {Tsrc<:Number,Tidx,Nidx}   
+##     dstsz = isnothing(dstsize) ? maximum_dims(idx) : dstsize 
+##     dst = similar(src, Tsrc, dstsz)
+##     xinit = isnothing(init) ? scatter_empty(op, Tsrc) : init 
+##     fill!(dst, xinit)
+##     scatter!(op, dst, src, idx)
+## end
+
+# function scatter_scalar_kernel!(op, dst, src, idx)
+#     index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
+
+#     @inbounds if index <= length(idx)
+#         CUDA.@atomic dst[idx[index]...] = op(dst[idx[index]...], src)
+#     end
+#     return nothing
+# end
+
+# function NNlib.scatter!(op, dst::AnyCuArray, src::Number, idx::AnyCuArray)
+#     max_idx = length(idx)
+#     args = op, dst, src, idx
+    
+#     kernel = @cuda launch=false scatter_scalar_kernel!(args...)
+#     config = launch_configuration(kernel.fun; max_threads=256)
+#     threads = min(max_idx, config.threads)
+#     blocks = cld(max_idx, threads)
+#     kernel(args...; threads=threads, blocks=blocks)
+#     return dst
+# end
diff --git a/src/GNNGraphs/query.jl b/src/GNNGraphs/query.jl
@@ -127,8 +127,6 @@ end
 #     return [fneighs(g, i) for i in nodes]
 # end
 
-
-
 adjacency_list(g::GNNGraph; dir=:out) = adjacency_list(g, 1:g.num_nodes; dir)
 
 
@@ -159,7 +157,7 @@ function Graphs.adjacency_matrix(g::GNNGraph{<:ADJMAT_T}, T::DataType=eltype(g);
     @assert dir ∈ [:in, :out]
     A = g.graph
     if !weighted
-        A = map(>(0), A)
+        A = binarize(A)
     end
     A = T != eltype(A) ? T.(A) : A
     return dir == :out ? A : A'
@@ -201,15 +199,16 @@ function Graphs.degree(g::GNNGraph{<:COO_T}, T::TT=nothing; dir=:out, edge_weigh
     s, t = edge_index(g)
 
     edge_weight = _get_edge_weight(g, edge_weight)
-    edge_weight = edge_weight === nothing ? eltype(s)(1) : edge_weight
+    edge_weight = edge_weight === nothing ? ones_like(s) : edge_weight
 
     T = isnothing(T) ? eltype(edge_weight) : T
     degs = fill!(similar(s, T, g.num_nodes), 0)
+
     if dir ∈ [:out, :both]
-        NNlib.scatter!(+, degs, edge_weight, s)
+        degs = degs .+ NNlib.scatter(+, edge_weight, s, dstsize=(g.num_nodes,))
     end
     if dir ∈ [:in, :both]
-        NNlib.scatter!(+, degs, edge_weight, t)
+        degs = degs .+ NNlib.scatter(+, edge_weight, t, dstsize=(g.num_nodes,))
     end
     return degs 
 end
@@ -233,7 +232,7 @@ function Graphs.degree(g::GNNGraph{<:ADJMAT_T}, T::TT=nothing; dir=:out, edge_we
     end
     A = adjacency_matrix(g)
     if edge_weight === false
-        A = map(>(0), A)
+        A = binarize(A)
     end
     A = eltype(A) != T ? T.(A) : A
     return dir == :out ? vec(sum(A, dims=2)) : 
@@ -394,14 +393,12 @@ function has_multi_edges(g::GNNGraph)
     length(union(idxs)) < length(idxs)
 end
 
-
+@non_differentiable edge_index(x...)
 @non_differentiable adjacency_list(x...)
-@non_differentiable adjacency_matrix(x...)
-@non_differentiable degree(x...)
 @non_differentiable graph_indicator(x...)
 @non_differentiable has_multi_edges(x...)
 @non_differentiable Graphs.has_self_loops(x...) 
 @non_differentiable is_bidirected(x...)
-@non_differentiable normalized_adjacency(x...)
-@non_differentiable normalized_laplacian(x...)
-@non_differentiable scaled_laplacian(x...)
+@non_differentiable normalized_adjacency(x...) # TODO remove this in the future
+@non_differentiable normalized_laplacian(x...) # TODO remove this in the future
+@non_differentiable scaled_laplacian(x...) # TODO remove this in the future
diff --git a/src/GNNGraphs/utils.jl b/src/GNNGraphs/utils.jl
@@ -149,6 +149,9 @@ function edge_decoding(idx, n; directed=true)
     return s, t
 end
 
+binarize(x) = map(>(0), x)
+
+@non_differentiable binarize(x...)
 @non_differentiable edge_encoding(x...)
 @non_differentiable edge_decoding(x...)
 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -79,9 +79,14 @@ function (l::GCNConv)(g::GNNGraph, x::AbstractMatrix{T}, edge_weight::EW=nothing
     
     @assert !(g isa GNNGraph{<:ADJMAT_T} && edge_weight !== nothing) "Providing external edge_weight is not yet supported for adjacency matrix graphs"
 
+    if edge_weight !== nothing
+        @assert length(edge_weight) == g.num_edges "Wrong number of edge weights (expected $(g.num_edges) but given $(length(edge_weight)))" 
+    end
+
     if l.add_self_loops
         g = add_self_loops(g)
         if edge_weight !== nothing
+            # Pad weights with ones
             # TODO for ADJMAT_T the new edges are not generally at the end
             edge_weight = [edge_weight; fill!(similar(edge_weight, g.num_nodes), 1)]
             @assert length(edge_weight) == g.num_edges
diff --git a/src/utils.jl b/src/utils.jl
@@ -1,62 +1,5 @@
 ofeltype(x, y) = convert(float(eltype(x)), y)
 
-# Considers the src a zero dimensional object.
-# Useful for implementing `StatsBase.counts`, `degree`, etc...
-# function NNlib.scatter!(op, dst::AbstractArray, src::Number, idx::AbstractArray)
-#     for k in CartesianIndices(idx)
-#         # dst_v = NNlib._view(dst, idx[k])
-#         # dst_v .= (op).(dst_v, src)
-#         dst[idx[k]] .= (op).(dst[idx[k]], src)
-#     end
-#     dst
-# end
-
-# 10 time faster than the generic version above. 
-# All the speedup comes from not broadcasting `op`, i dunno why.
-function NNlib.scatter!(op, dst::AbstractVector, src::Number, idx::AbstractVector{<:Integer})
-    for i in idx
-        dst[i] = op(dst[i], src)
-    end
-end
-
-# NNlib._view(X, k) = view(X, k...)
-# NNlib._view(X, k::Union{Integer, CartesianIndex}) = view(X,  k)
-
-# Considers src as a zero dimensional object to be scattered
-# function NNlib.scatter(op,
-#                 src::Tsrc,
-#                 idx::AbstractArray{Tidx,Nidx};
-#                 init = nothing, dstsize = nothing) where {Tsrc<:Number,Tidx,Nidx}
-    
-#     dstsz = isnothing(dstsize) ? maximum_dims(idx) : dstsize 
-#     dst = similar(src, Tsrc, dstsz)
-#     xinit = isnothing(init) ? scatter_empty(op, Tsrc) : init 
-#     fill!(dst, xinit)
-#     scatter!(op, dst, src, idx)
-# end
-
-
-function scatter_scalar_kernel!(op, dst, src, idx)
-    index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
-
-    @inbounds if index <= length(idx)
-        CUDA.@atomic dst[idx[index]...] = op(dst[idx[index]...], src)
-    end
-    return nothing
-end
-
-function NNlib.scatter!(op, dst::AnyCuArray, src::Number, idx::AnyCuArray)
-    max_idx = length(idx)
-    args = op, dst, src, idx
-    
-    kernel = @cuda launch=false scatter_scalar_kernel!(args...)
-    config = launch_configuration(kernel.fun; max_threads=256)
-    threads = min(max_idx, config.threads)
-    blocks = cld(max_idx, threads)
-    kernel(args...; threads=threads, blocks=blocks)
-    return dst
-end
-
 """
     reduce_nodes(aggr, g, x)
 
@@ -157,3 +100,16 @@ function broadcast_edges(g::GNNGraph, x)
     return gather(x, gi)
 end
 
+# More generic version of
+# https://github.com/JuliaDiff/ChainRules.jl/pull/586
+# This applies to all arrays
+# Withouth this, gradient of T.(A) for A dense gpu matrix errors.
+function ChainRulesCore.rrule(::typeof(Broadcast.broadcasted), T::Type{<:Number}, x::AbstractArray)
+    proj = ProjectTo(x)
+
+    function broadcasted_cast(Δ)
+        return NoTangent(), NoTangent(), proj(Δ)         
+    end
+
+    return T.(x), broadcasted_cast
+end
diff --git a/test/GNNGraphs/convert.jl b/test/GNNGraphs/convert.jl
@@ -0,0 +1,22 @@
+if TEST_GPU 
+    @testset "to_coo(dense) on gpu" begin
+        get_st(A) = GNNGraphs.to_coo(A)[1][1:2]
+        get_val(A) = GNNGraphs.to_coo(A)[1][3]
+
+        A = cu([0 2 2; 2. 0 2; 2 2 0])
+        
+        y = get_val(A)
+        @test y isa CuVector{Float32}
+        @test Array(y) ≈ [2, 2, 2, 2, 2, 2]
+
+        s, t = get_st(A) 
+        @test s isa CuVector
+        @test t isa CuVector
+        @test_broken s isa CuVector{Int32}
+        @test_broken t isa CuVector{Int32}
+        @test Array(s) == [2, 3, 1, 3, 1, 2]
+        @test Array(t) == [1, 1, 2, 2, 3, 3]
+
+        @test gradient(A -> sum(get_val(A)), A)[1] isa CuMatrix{Float32}
+    end
+end
diff --git a/test/GNNGraphs/query.jl b/test/GNNGraphs/query.jl
@@ -71,16 +71,39 @@
             end
             @test eltype(d) <: Integer
             if GRAPH_T == :coo
+            # TODO use the @test option broken = (GRAPH_T != :coo) on julia >= 1.7
                 @test degree(g, edge_weight=2*eweight) == [4.4, 2.4, 2.0, 0.0]
+            else
+                @test_broken degree(g, edge_weight=2*eweight) == [4.4, 2.4, 2.0, 0.0]
             end
-
+            
             if TEST_GPU
                 g_gpu = g |> gpu
                 d = degree(g)
                 d_gpu = degree(g_gpu)
                 @test d_gpu isa CuVector{Float32}
                 @test Array(d_gpu) ≈ d
             end
+            @testset "gradient" begin
+                gw = gradient(eweight) do w
+                        g = GNNGraph((s, t, w), graph_type=GRAPH_T)
+                        sum(degree(g, edge_weight=false))
+                    end[1]
+                    
+                @test gw === nothing
+                
+                gw = gradient(eweight) do w
+                    g = GNNGraph((s, t, w), graph_type=GRAPH_T)
+                    sum(degree(g, edge_weight=true))
+                end[1]
+
+                if GRAPH_T == :sparse
+                    @test_broken gw isa Vector{Float64}
+                    @test gw isa AbstractVector{Float64}
+                else 
+                    @test gw isa Vector{Float64}
+                end
+            end
         end
     end
 
@@ -105,5 +128,25 @@
         Abin = adjacency_matrix(g, Float32, weighted=false)
         @test Abin ≈ abin
         @test eltype(Abin) == Float32    
+
+        @testset "gradient" begin 
+            s = [1,2,3]
+            t = [2,3,1]
+            w = [0.1,0.1,0.2]
+            gw = gradient(w) do w
+                    g = GNNGraph(s, t, w, graph_type=GRAPH_T)
+                    A = adjacency_matrix(g, weighted=false)
+                    sum(A)
+                end[1]
+            @test gw === nothing  
+
+            gw = gradient(w) do w
+                g = GNNGraph(s, t, w, graph_type=GRAPH_T)
+                A = adjacency_matrix(g, weighted=true)
+                sum(A)
+            end[1]
+
+            @test gw == [1,1,1]
+        end
     end
 end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
diff --git a/test/runtests.jl b/test/runtests.jl
diff --git a/test/test_utils.jl b/test/test_utils.jl