@@ -829,6 +829,113 @@ end
829
829
x, y
830
830
end
831
831
832
+ # ### --------------------------------------------------
833
+
834
+ # # Issue 511 for alternatives to polynomial composition
835
+ # https://ens-lyon.hal.science/ensl-00546102/document
836
+ # should be faster, but not using O(nlog(n)) multiplication below.
837
+ function ranged_horner (f:: StandardBasisPolynomial , g:: StandardBasisPolynomial , i, l)
838
+ out = f[i+ l- 1 ] * one (g)
839
+ for j ∈ (l- 2 ): - 1 : 0
840
+ out = muladd (out, g, f[i+ j])
841
+ end
842
+ out
843
+ end
844
+
845
+ # Compute `p(q)`. Seems more performant than
846
+ function practical_polynomial_composition (f:: StandardBasisPolynomial , g:: StandardBasisPolynomial )
847
+ l = 4
848
+ n = degree (f)
849
+
850
+ kᵢ = ceil (Int, (n + 1 )/ l)
851
+ isone (kᵢ) && return f (g)
852
+
853
+ hij = [ranged_horner (f, g, j* l, l) for j ∈ 0 : (kᵢ- 1 )]
854
+
855
+ G = g^ l
856
+ o = 1
857
+ while kᵢ > 1
858
+ kᵢ = ceil (Int, kᵢ/ 2 )
859
+ isodd (length (hij)) && push! (hij, zero (f))
860
+ hij = [hij[2 j+ o] + hij[2 j+ 1 + o]* G for j ∈ 0 : (kᵢ- 1 )]
861
+ kᵢ > 1 && (G = G^ 2 )
862
+ end
863
+
864
+ return only (hij)
865
+ end
866
+
867
+ # # issue #519 polynomial multiplication via FFT
868
+ # # cf. http://www.cs.toronto.edu/~denisp/csc373/docs/tutorial3-adv-writeup.pdf
869
+ # # Compute recursive_fft
870
+ # # assumes length(as) = 2^k for some k
871
+ # # ωₙ is exp(-2pi*im/n) or Cyclotomics.E(n), the latter slower but non-lossy
872
+ function recursive_fft (as, ωₙ = nothing )
873
+ n = length (as)
874
+ N = 2 ^ ceil (Int, log2 (n))
875
+ ω = something (ωₙ, exp (- 2pi * im/ N))
876
+ R = typeof (ω * first (as))
877
+ ys = Vector {R} (undef, N)
878
+ recursive_fft! (ys, as, ω)
879
+ ys
880
+ end
881
+
882
+ # # pass in same ωₙ as recursive_fft
883
+ function inverse_fft (as, ωₙ= nothing )
884
+ n = length (as)
885
+ ω = something (ωₙ, exp (- 2pi * im/ n))
886
+ recursive_fft (as, conj (ω)) / n
887
+ end
888
+
889
+ # note: can write version for big coefficients, but still allocates a bit
890
+ function recursive_fft! (ys, as, ωₙ)
891
+
892
+ n = length (as)
893
+ @assert n == length (ys) == 2 ^ (ceil (Int, log2 (n)))
894
+
895
+ ω = one (ωₙ) * one (first (as))
896
+ isone (n) && (ys[1 ] = ω * as[1 ]; return ys)
897
+
898
+ o = 1
899
+ eidx = o .+ range (0 , n- 2 , step= 2 )
900
+ oidx = o .+ range (1 , n- 1 , step= 2 )
901
+
902
+ n2 = n ÷ 2
903
+
904
+ ye = recursive_fft! (view (ys, 1 : n2), view (as, eidx), ωₙ^ 2 )
905
+ yo = recursive_fft! (view (ys, (n2+ 1 ): n), view (as, oidx), ωₙ^ 2 )
906
+
907
+ @inbounds for k ∈ o .+ range (0 , n2 - 1 )
908
+ yₑ, yₒ = ye[k], yo[k]
909
+ ys[k ] = muladd (ω, yₒ, yₑ)
910
+ ys[k + n2] = yₑ - ω* yₒ
911
+ ω *= ωₙ
912
+ end
913
+ return ys
914
+ end
915
+
916
+ # This *should* be faster -- (O(nlog(n)), but this version is definitely not so.
917
+ # when `ωₙ = Cyclotomics.E` and T,S are integer, this can be exact
918
+ function poly_multiplication_fft (p:: P , q:: Q , ωₙ= nothing ) where {T,P<: StandardBasisPolynomial{T} ,
919
+ S,Q<: StandardBasisPolynomial{S} }
920
+ as, bs = coeffs0 (p), coeffs0 (q)
921
+ n = maximum (length, (as, bs))
922
+ N = 2 ^ ceil (Int, log2 (n))
923
+
924
+ as′ = zeros (promote_type (T,S), 2 N)
925
+ copy! (view (as′, 1 : length (as)), as)
926
+
927
+ ω = something (ωₙ, n -> exp (- 2im * pi / n))(2 N)
928
+ âs = recursive_fft (as′, ω)
929
+
930
+ as′ .= 0
931
+ copy! (view (as′, 1 : length (bs)), bs)
932
+ b̂s = recursive_fft (as′, ω)
933
+
934
+ âb̂s = âs .* b̂s
935
+
936
+ PP = promote_type (P,Q)
937
+ ⟒ (PP)(inverse_fft (âb̂s, ω))
938
+ end
832
939
833
940
834
941
# # --------------------------------------------------
0 commit comments