Skip to content

Commit 339e7d0

Browse files
committed
fix hann window
1 parent ce9a6a8 commit 339e7d0

File tree

5 files changed

+79
-181
lines changed

5 files changed

+79
-181
lines changed

JxlCoder.podspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Pod::Spec.new do |s|
22
s.name = 'JxlCoder'
3-
s.version = '1.2.7'
3+
s.version = '1.2.8'
44
s.summary = 'JXL coder for iOS and MacOS'
55
s.description = 'Provides support for JXL files in iOS and MacOS'
66
s.homepage = 'https://github.com/awxkee/jxl-coder-swift'

Sources/JxlCoder/JXLCoder.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public class JXLCoder {
3838
public static func decode(srcStream: InputStream,
3939
rescale: CGSize = .zero,
4040
pixelFormat: JXLPreferredPixelFormat = .optimal,
41-
sampler: JxlSampler = .mitchell) throws -> JXLPlatformImage {
41+
sampler: JxlSampler = .hann) throws -> JXLPlatformImage {
4242
return try shared.decode(srcStream, rescale: rescale, pixelFormat: pixelFormat, sampler: sampler)
4343
}
4444

@@ -49,7 +49,7 @@ public class JXLCoder {
4949
public static func decode(url: URL,
5050
rescale: CGSize = .zero,
5151
pixelFormat: JXLPreferredPixelFormat = .optimal,
52-
sampler: JxlSampler = .mitchell) throws -> JXLPlatformImage {
52+
sampler: JxlSampler = .hann) throws -> JXLPlatformImage {
5353
guard let srcStream = InputStream(url: url) else {
5454
throw NSError(domain: "JXLCoder", code: 500,
5555
userInfo: [NSLocalizedDescriptionKey: "JXLCoder cannot open provided URL"])
@@ -64,7 +64,7 @@ public class JXLCoder {
6464
public static func decode(data: Data,
6565
rescale: CGSize = .zero,
6666
pixelFormat: JXLPreferredPixelFormat = .optimal,
67-
sampler: JxlSampler = .mitchell) throws -> JXLPlatformImage {
67+
sampler: JxlSampler = .hann) throws -> JXLPlatformImage {
6868
let srcStream = InputStream(data: data)
6969
return try shared.decode(srcStream, rescale: rescale, pixelFormat: pixelFormat, sampler: sampler)
7070
}

Sources/jxlc/ScaleInterpolator.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,11 @@ inline T LanczosWindow(T x, const T a) {
218218

219219
template <typename T>
220220
inline T HannWindow(T x, const T length) {
221-
if (abs(x) <= length / 2) {
222-
T cx = fastCos(T(M_PI)*x / length);
223-
return (1/length)*cx*cx;
224-
} else {
225-
return T(0);
221+
const float size = length * 2 - 1;
222+
if (abs(x) <= size) {
223+
return 0.5f * (1 - fastCos(T(2)*T(M_PI) * size / length));
226224
}
225+
return T(0);
227226
}
228227

229228
template <typename T>

Sources/jxlc/ScaleInterpolator.h

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ D DemoteTo(T t, float maxColors);
5757
template <typename T>
5858
T CubicBSpline(T t);
5959

60+
template <typename T>
61+
T LanczosWindow(T x, const T a);
62+
6063
#if __arm64__
6164
#include <arm_neon.h>
6265

@@ -71,6 +74,58 @@ static inline float32x4_t Cos(const float32x4_t d) {
7174
return vmlaq_f32(C0, x2, vmlaq_f32(C1, x2, vmlaq_n_f32(C2, x2, C3)));
7275
}
7376

77+
/** Sin polynomial coefficients */
78+
constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3)
79+
constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5)
80+
constexpr float te_sin_coeff4 = 0.023809523810f; // 1/(6*7)
81+
constexpr float te_sin_coeff5 = 0.013888888889f; // 1/(8*9)
82+
83+
__attribute__((always_inline))
84+
inline float32x4_t vsinq_f32(float32x4_t val)
85+
{
86+
const float32x4_t pi_v = vdupq_n_f32(M_PI);
87+
const float32x4_t pio2_v = vdupq_n_f32(M_PI / 2);
88+
const float32x4_t ipi_v = vdupq_n_f32(1 / M_PI);
89+
90+
//Find positive or negative
91+
const int32x4_t c_v = vabsq_s32(vcvtq_s32_f32(vmulq_f32(val, ipi_v)));
92+
const uint32x4_t sign_v = vcleq_f32(val, vdupq_n_f32(0));
93+
const uint32x4_t odd_v = vandq_u32(vreinterpretq_u32_s32(c_v), vdupq_n_u32(1));
94+
95+
uint32x4_t neg_v = veorq_u32(odd_v, sign_v);
96+
97+
//Modulus a - (n * int(a*(1/n)))
98+
float32x4_t ma = vsubq_f32(vabsq_f32(val), vmulq_f32(pi_v, vcvtq_f32_s32(c_v)));
99+
const uint32x4_t reb_v = vcgeq_f32(ma, pio2_v);
100+
101+
//Rebase a between 0 and pi/2
102+
ma = vbslq_f32(reb_v, vsubq_f32(pi_v, ma), ma);
103+
104+
//Taylor series
105+
const float32x4_t ma2 = vmulq_f32(ma, ma);
106+
107+
//2nd elem: x^3 / 3!
108+
float32x4_t elem = vmulq_f32(vmulq_f32(ma, ma2), vdupq_n_f32(te_sin_coeff2));
109+
float32x4_t res = vsubq_f32(ma, elem);
110+
111+
//3rd elem: x^5 / 5!
112+
elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff3));
113+
res = vaddq_f32(res, elem);
114+
115+
//4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val)
116+
elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff4));
117+
res = vsubq_f32(res, elem);
118+
119+
//5th elem: x^9 / 9!
120+
elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff5));
121+
res = vaddq_f32(res, elem);
122+
123+
//Change of sign
124+
neg_v = vshlq_n_u32(neg_v, 31);
125+
res = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(res), neg_v));
126+
return res;
127+
}
128+
74129
__attribute__((always_inline))
75130
static inline float32x4_t FastSin(const float32x4_t v) {
76131
constexpr float A = 4.0f/(M_PI*M_PI);
@@ -87,7 +142,7 @@ static inline float32x4_t FastSin(const float32x4_t v) {
87142
__attribute__((always_inline))
88143
static inline float32x4_t Sinc(const float32x4_t v) {
89144
const float32x4_t zeros = vdupq_n_f32(0);
90-
const float32x4_t ones = vdupq_n_f32(0);
145+
const float32x4_t ones = vdupq_n_f32(1);
91146
uint32x4_t mask = vceqq_f32(v, zeros);
92147
// if < 0 then set to 1
93148
float32x4_t x = vbslq_f32(mask, ones, v);
@@ -104,20 +159,22 @@ static inline float32x4_t LanczosWindow(const float32x4_t v, const float a) {
104159
const float32x4_t zeros = vdupq_n_f32(0);
105160
uint32x4_t mask = vcltq_f32(vabsq_f32(v), fullLength);
106161
float32x4_t rv = vmulq_n_f32(v, M_PI);
107-
float32x4_t x = vmulq_f32(Sinc(rv), Sinc(vmulq_f32(v, invLength)));
162+
float32x4_t x = vmulq_f32(Sinc(rv), Sinc(vmulq_f32(rv, invLength)));
108163
x = vbslq_f32(mask, x, zeros);
109164
return x;
110165
}
111166

112167
__attribute__((always_inline))
113168
static inline float32x4_t HannWindow(const float32x4_t d, const float length) {
114-
const float32x4_t fullLength = vrecpeq_f32(vdupq_n_f32(length));
115-
const float32x4_t halfLength = vdupq_n_f32(length / 2);
169+
const float doublePi = 2 * M_PI;
170+
const float32x4_t wndSize = vdupq_n_f32(length * 2 - 1);
171+
const float32x4_t rWndSize = vrecpeq_f32(wndSize);
172+
uint32x4_t mask = vcgtq_f32(vabsq_f32(d), wndSize);
173+
const float32x4_t ones = vdupq_n_f32(1);
174+
float32x4_t cx = vmulq_n_f32(vsubq_f32(ones, Cos(vmulq_f32(vmulq_n_f32(d, doublePi), rWndSize))), 0.5f);
116175
const float32x4_t zeros = vdupq_n_f32(0);
117-
uint32x4_t mask = vcltq_f32(vabsq_f32(d), halfLength);
118-
float32x4_t cx = Cos(vmulq_f32(vmulq_n_f32(d, M_PI), fullLength));
119-
cx = vmulq_f32(vmulq_f32(cx, cx), fullLength);
120-
return vbslq_f32(mask, zeros, cx);
176+
cx = vbslq_f32(mask, zeros, cx);
177+
return cx;
121178
}
122179

123180
__attribute__((always_inline))
@@ -220,9 +277,6 @@ T MitchellNetravali(T d, T p0, T p1, T p2, T p3);
220277
template <typename T>
221278
T sinc(T x);
222279

223-
template <typename T>
224-
T LanczosWindow(T x, const T a);
225-
226280
template <typename T>
227281
T HannWindow(T x, const T length);
228282

0 commit comments

Comments
 (0)