Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86 fast checking for alpha singularity #16

Merged
merged 16 commits into from
Jan 1, 2025
11 changes: 11 additions & 0 deletions .github/workflows/build_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
- run: cargo clippy

tests:
name: Testing
strategy:
matrix:
os: [ ubuntu-latest, macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- run: cargo test

fuzz_rgba_8bit:
name: Fuzzing 8bit
strategy:
Expand Down
229 changes: 140 additions & 89 deletions app/benches/resize_rgba/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use fast_image_resize::FilterType::Lanczos3;
use fast_image_resize::{CpuExtensions, PixelType, ResizeAlg, ResizeOptions, Resizer};
use image::{GenericImageView, ImageReader};
use pic_scale::{
ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingF32, ThreadingPolicy,
ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingF32, ScalingU16,
ThreadingPolicy,
};

pub fn criterion_benchmark(c: &mut Criterion) {
Expand All @@ -14,114 +15,164 @@ pub fn criterion_benchmark(c: &mut Criterion) {
.unwrap();
let dimensions = img.dimensions();
let src_bytes = img.as_bytes();
c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
let mut copied: Vec<u8> = Vec::from(src_bytes);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store = ImageStore::<u8, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
)
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba(&store, &mut target, true);
})
});

let f32_image: Vec<f32> = src_bytes.iter().map(|&x| x as f32 / 255f32).collect();
// c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
// let mut copied: Vec<u8> = Vec::from(src_bytes);
// b.iter(|| {
// let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
// scaler.set_threading_policy(ThreadingPolicy::Single);
// let store = ImageStore::<u8, 4>::from_slice(
// &mut copied,
// dimensions.0 as usize,
// dimensions.1 as usize,
// )
// .unwrap();
// let mut target =
// ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
// _ = scaler.resize_rgba(&store, &mut target, true);
// })
// });
//
// let f32_image: Vec<f32> = src_bytes.iter().map(|&x| x as f32 / 255f32).collect();
//
// c.bench_function("Pic scale RGBA with alpha f32: Lanczos 3", |b| {
// let mut copied: Vec<f32> = Vec::from(f32_image.clone());
// b.iter(|| {
// let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
// scaler.set_threading_policy(ThreadingPolicy::Single);
// let store = ImageStore::<f32, 4>::from_slice(
// &mut copied,
// dimensions.0 as usize,
// dimensions.1 as usize,
// )
// .unwrap();
// let mut target =
// ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
// _ = scaler.resize_rgba_f32(&store, &mut target, false);
// })
// });
//
// c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
// let mut vc = Vec::from(img.as_bytes());
// b.iter(|| {
// let pixel_type: PixelType = PixelType::U8x4;
// let src_image =
// Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
// let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);
//
// let mut resizer = Resizer::new();
// #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Neon);
// }
// #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Avx2);
// }
// resizer
// .resize(
// &src_image,
// &mut dst_image,
// &ResizeOptions::new()
// .resize_alg(ResizeAlg::Convolution(Lanczos3))
// .use_alpha(true),
// )
// .unwrap();
// })
// });
//
// c.bench_function("Pic scale RGBA without alpha: Lanczos 3", |b| {
// let mut copied: Vec<u8> = Vec::from(src_bytes);
// b.iter(|| {
// let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
// scaler.set_threading_policy(ThreadingPolicy::Single);
// let store = ImageStore::<u8, 4>::from_slice(
// &mut copied,
// dimensions.0 as usize,
// dimensions.1 as usize,
// )
// .unwrap();
// let mut target =
// ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
// _ = scaler.resize_rgba(&store, &mut target, false);
// })
// });
//
// c.bench_function("Fast image resize RGBA without alpha: Lanczos 3", |b| {
// let mut vc = Vec::from(img.as_bytes());
// b.iter(|| {
// let pixel_type: PixelType = PixelType::U8x4;
// let src_image =
// Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
// let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);
//
// let mut resizer = Resizer::new();
// #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Neon);
// }
// #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Avx2);
// }
// resizer
// .resize(
// &src_image,
// &mut dst_image,
// &ResizeOptions::new()
// .resize_alg(ResizeAlg::Convolution(Lanczos3))
// .use_alpha(false),
// )
// .unwrap();
// })
// });

c.bench_function("Pic scale RGBA with alpha f32: Lanczos 3", |b| {
let mut copied: Vec<f32> = Vec::from(f32_image.clone());
c.bench_function("Pic scale RGBA10 with alpha: Lanczos 3", |b| {
let mut copied: Vec<u16> = Vec::from(
src_bytes
.iter()
.map(|&x| ((x as u16) << 2) | ((x as u16) >> 6))
.collect::<Vec<_>>(),
);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store = ImageStore::<f32, 4>::from_slice(
let store = ImageStore::<u16, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
)
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba_f32(&store, &mut target, false);
let mut target = ImageStoreMut::alloc_with_depth(
dimensions.0 as usize / 4,
dimensions.1 as usize / 4,
10,
);
_ = scaler.resize_rgba_u16(&store, &mut target, true);
})
});

c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
let mut vc = Vec::from(img.as_bytes());
b.iter(|| {
let pixel_type: PixelType = PixelType::U8x4;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Avx2);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(true),
)
.unwrap();
})
});

c.bench_function("Pic scale RGBA without alpha: Lanczos 3", |b| {
let mut copied: Vec<u8> = Vec::from(src_bytes);
c.bench_function("Pic scale RGBA10 without alpha: Lanczos 3", |b| {
let mut copied: Vec<u16> = Vec::from(
src_bytes
.iter()
.map(|&x| ((x as u16) << 2) | ((x as u16) >> 6))
.collect::<Vec<_>>(),
);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store = ImageStore::<u8, 4>::from_slice(
let store = ImageStore::<u16, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
)
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba(&store, &mut target, false);
})
});

c.bench_function("Fast image resize RGBA without alpha: Lanczos 3", |b| {
let mut vc = Vec::from(img.as_bytes());
b.iter(|| {
let pixel_type: PixelType = PixelType::U8x4;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Avx2);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(false),
)
.unwrap();
let mut target = ImageStoreMut::alloc_with_depth(
dimensions.0 as usize / 4,
dimensions.1 as usize / 4,
10,
);
_ = scaler.resize_rgba_u16(&store, &mut target, false);
})
});
}
Expand Down
20 changes: 9 additions & 11 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@ mod split;

use std::time::Instant;

use crate::merge::merge_channels_3;
use crate::split::split_channels_3;
use fast_image_resize::images::Image;
use fast_image_resize::{
CpuExtensions, FilterType, IntoImageView, PixelType, ResizeAlg, ResizeOptions, Resizer,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use pic_scale::{
Ar30ByteOrder, ImageSize, ImageStore, ImageStoreMut, JzazbzScaler, LChScaler, LabScaler,
LinearApproxScaler, LinearScaler, LuvScaler, OklabScaler, ResamplingFunction, Scaler, Scaling,
ScalingU16, SigmoidalScaler, ThreadingPolicy, TransferFunction, XYZScaler,
ImageSize, ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingU16,
ThreadingPolicy,
};

fn resize_plane(
Expand Down Expand Up @@ -53,15 +50,16 @@ fn main() {
let transient = img.to_rgba8();
let mut bytes = Vec::from(transient.as_bytes());

let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
let mut scaler = Scaler::new(ResamplingFunction::Bilinear);
scaler.set_threading_policy(ThreadingPolicy::Single);

// resize_plane(378, 257, 257, 257, ResamplingFunction::Bilinear);

let mut choke: Vec<u16> = bytes.iter().map(|&x| (x as u16) << 2).collect();

//
let store =
ImageStore::<u16, 4>::from_slice(&mut choke, dimensions.0 as usize, dimensions.1 as usize)
ImageStore::<u16, 4>::from_slice(&choke, dimensions.0 as usize, dimensions.1 as usize)
.unwrap();

let dst_size = ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
Expand All @@ -78,13 +76,13 @@ fn main() {
// .unwrap();

let mut dst_store = ImageStoreMut::<u16, 4>::alloc_with_depth(
dimensions.0 as usize / 3,
dimensions.1 as usize / 3,
dimensions.0 as usize,
dimensions.1 as usize / 2,
10,
);

scaler
.resize_rgba_u16(&store, &mut dst_store, false)
.resize_rgba_u16(&store, &mut dst_store, true)
.unwrap();

let elapsed_time = start_time.elapsed();
Expand Down Expand Up @@ -167,7 +165,7 @@ fn main() {
.iter()
.map(|&x| (x >> 2) as u8)
.collect();
//

// let dst = dst_store.as_bytes();
// let dst = resized;
// image::save_buffer(
Expand Down
6 changes: 4 additions & 2 deletions src/alpha_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
use num_traits::AsPrimitive;
use std::ops::{AddAssign, BitXor};

#[allow(dead_code)]
pub(crate) fn has_non_constant_cap_alpha_rgba8(store: &[u8], width: usize) -> bool {
has_non_constant_cap_alpha::<u8, u32, 3, 4>(store, width)
}

#[allow(dead_code)]
pub(crate) fn has_non_constant_cap_alpha_rgba16(store: &[u16], width: usize) -> bool {
has_non_constant_cap_alpha::<u16, u64, 3, 4>(store, width)
}
Expand Down Expand Up @@ -61,7 +63,7 @@ where
if store.is_empty() {
return false;
}
let first = store[0];
let first = store[ALPHA_CHANNEL_INDEX];
let mut row_sums: J = 0u32.as_();
for row in store.chunks_exact(width * CHANNELS) {
for color in row.chunks_exact(CHANNELS) {
Expand All @@ -87,7 +89,7 @@ fn has_non_constant_cap_alpha_f32_impl<const ALPHA_CHANNEL_INDEX: usize, const C
if store.is_empty() {
return false;
}
let first = store[0].to_bits();
let first = store[ALPHA_CHANNEL_INDEX].to_bits();
let mut row_sums: u64 = 0u64;
for row in store.chunks_exact(width * CHANNELS) {
for color in row.chunks_exact(CHANNELS) {
Expand Down
Loading
Loading