Skip to content

Commit

Permalink
Merge pull request #16 from awxkee/dev
Browse files Browse the repository at this point in the history
x86 fast checking for alpha singularity
  • Loading branch information
awxkee authored Jan 1, 2025
2 parents 5ee3b03 + ab50ceb commit 32a1304
Show file tree
Hide file tree
Showing 57 changed files with 4,207 additions and 2,265 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/build_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
- run: cargo clippy

tests:
name: Testing
strategy:
matrix:
os: [ ubuntu-latest, macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
- run: cargo test

fuzz_rgba_8bit:
name: Fuzzing 8bit
strategy:
Expand Down
229 changes: 140 additions & 89 deletions app/benches/resize_rgba/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use fast_image_resize::FilterType::Lanczos3;
use fast_image_resize::{CpuExtensions, PixelType, ResizeAlg, ResizeOptions, Resizer};
use image::{GenericImageView, ImageReader};
use pic_scale::{
ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingF32, ThreadingPolicy,
ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingF32, ScalingU16,
ThreadingPolicy,
};

pub fn criterion_benchmark(c: &mut Criterion) {
Expand All @@ -14,114 +15,164 @@ pub fn criterion_benchmark(c: &mut Criterion) {
.unwrap();
let dimensions = img.dimensions();
let src_bytes = img.as_bytes();
c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
let mut copied: Vec<u8> = Vec::from(src_bytes);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store = ImageStore::<u8, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
)
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba(&store, &mut target, true);
})
});

let f32_image: Vec<f32> = src_bytes.iter().map(|&x| x as f32 / 255f32).collect();
// c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
// let mut copied: Vec<u8> = Vec::from(src_bytes);
// b.iter(|| {
// let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
// scaler.set_threading_policy(ThreadingPolicy::Single);
// let store = ImageStore::<u8, 4>::from_slice(
// &mut copied,
// dimensions.0 as usize,
// dimensions.1 as usize,
// )
// .unwrap();
// let mut target =
// ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
// _ = scaler.resize_rgba(&store, &mut target, true);
// })
// });
//
// let f32_image: Vec<f32> = src_bytes.iter().map(|&x| x as f32 / 255f32).collect();
//
// c.bench_function("Pic scale RGBA with alpha f32: Lanczos 3", |b| {
// let mut copied: Vec<f32> = Vec::from(f32_image.clone());
// b.iter(|| {
// let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
// scaler.set_threading_policy(ThreadingPolicy::Single);
// let store = ImageStore::<f32, 4>::from_slice(
// &mut copied,
// dimensions.0 as usize,
// dimensions.1 as usize,
// )
// .unwrap();
// let mut target =
// ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
// _ = scaler.resize_rgba_f32(&store, &mut target, false);
// })
// });
//
// c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
// let mut vc = Vec::from(img.as_bytes());
// b.iter(|| {
// let pixel_type: PixelType = PixelType::U8x4;
// let src_image =
// Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
// let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);
//
// let mut resizer = Resizer::new();
// #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Neon);
// }
// #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Avx2);
// }
// resizer
// .resize(
// &src_image,
// &mut dst_image,
// &ResizeOptions::new()
// .resize_alg(ResizeAlg::Convolution(Lanczos3))
// .use_alpha(true),
// )
// .unwrap();
// })
// });
//
// c.bench_function("Pic scale RGBA without alpha: Lanczos 3", |b| {
// let mut copied: Vec<u8> = Vec::from(src_bytes);
// b.iter(|| {
// let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
// scaler.set_threading_policy(ThreadingPolicy::Single);
// let store = ImageStore::<u8, 4>::from_slice(
// &mut copied,
// dimensions.0 as usize,
// dimensions.1 as usize,
// )
// .unwrap();
// let mut target =
// ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
// _ = scaler.resize_rgba(&store, &mut target, false);
// })
// });
//
// c.bench_function("Fast image resize RGBA without alpha: Lanczos 3", |b| {
// let mut vc = Vec::from(img.as_bytes());
// b.iter(|| {
// let pixel_type: PixelType = PixelType::U8x4;
// let src_image =
// Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
// let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);
//
// let mut resizer = Resizer::new();
// #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Neon);
// }
// #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
// unsafe {
// resizer.set_cpu_extensions(CpuExtensions::Avx2);
// }
// resizer
// .resize(
// &src_image,
// &mut dst_image,
// &ResizeOptions::new()
// .resize_alg(ResizeAlg::Convolution(Lanczos3))
// .use_alpha(false),
// )
// .unwrap();
// })
// });

c.bench_function("Pic scale RGBA with alpha f32: Lanczos 3", |b| {
let mut copied: Vec<f32> = Vec::from(f32_image.clone());
c.bench_function("Pic scale RGBA10 with alpha: Lanczos 3", |b| {
let mut copied: Vec<u16> = Vec::from(
src_bytes
.iter()
.map(|&x| ((x as u16) << 2) | ((x as u16) >> 6))
.collect::<Vec<_>>(),
);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store = ImageStore::<f32, 4>::from_slice(
let store = ImageStore::<u16, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
)
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba_f32(&store, &mut target, false);
let mut target = ImageStoreMut::alloc_with_depth(
dimensions.0 as usize / 4,
dimensions.1 as usize / 4,
10,
);
_ = scaler.resize_rgba_u16(&store, &mut target, true);
})
});

c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
let mut vc = Vec::from(img.as_bytes());
b.iter(|| {
let pixel_type: PixelType = PixelType::U8x4;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Avx2);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(true),
)
.unwrap();
})
});

c.bench_function("Pic scale RGBA without alpha: Lanczos 3", |b| {
let mut copied: Vec<u8> = Vec::from(src_bytes);
c.bench_function("Pic scale RGBA10 without alpha: Lanczos 3", |b| {
let mut copied: Vec<u16> = Vec::from(
src_bytes
.iter()
.map(|&x| ((x as u16) << 2) | ((x as u16) >> 6))
.collect::<Vec<_>>(),
);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store = ImageStore::<u8, 4>::from_slice(
let store = ImageStore::<u16, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
)
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba(&store, &mut target, false);
})
});

c.bench_function("Fast image resize RGBA without alpha: Lanczos 3", |b| {
let mut vc = Vec::from(img.as_bytes());
b.iter(|| {
let pixel_type: PixelType = PixelType::U8x4;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Avx2);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(false),
)
.unwrap();
let mut target = ImageStoreMut::alloc_with_depth(
dimensions.0 as usize / 4,
dimensions.1 as usize / 4,
10,
);
_ = scaler.resize_rgba_u16(&store, &mut target, false);
})
});
}
Expand Down
20 changes: 9 additions & 11 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@ mod split;

use std::time::Instant;

use crate::merge::merge_channels_3;
use crate::split::split_channels_3;
use fast_image_resize::images::Image;
use fast_image_resize::{
CpuExtensions, FilterType, IntoImageView, PixelType, ResizeAlg, ResizeOptions, Resizer,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use pic_scale::{
Ar30ByteOrder, ImageSize, ImageStore, ImageStoreMut, JzazbzScaler, LChScaler, LabScaler,
LinearApproxScaler, LinearScaler, LuvScaler, OklabScaler, ResamplingFunction, Scaler, Scaling,
ScalingU16, SigmoidalScaler, ThreadingPolicy, TransferFunction, XYZScaler,
ImageSize, ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingU16,
ThreadingPolicy,
};

fn resize_plane(
Expand Down Expand Up @@ -53,15 +50,16 @@ fn main() {
let transient = img.to_rgba8();
let mut bytes = Vec::from(transient.as_bytes());

let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
let mut scaler = Scaler::new(ResamplingFunction::Bilinear);
scaler.set_threading_policy(ThreadingPolicy::Single);

// resize_plane(378, 257, 257, 257, ResamplingFunction::Bilinear);

let mut choke: Vec<u16> = bytes.iter().map(|&x| (x as u16) << 2).collect();

//
let store =
ImageStore::<u16, 4>::from_slice(&mut choke, dimensions.0 as usize, dimensions.1 as usize)
ImageStore::<u16, 4>::from_slice(&choke, dimensions.0 as usize, dimensions.1 as usize)
.unwrap();

let dst_size = ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
Expand All @@ -78,13 +76,13 @@ fn main() {
// .unwrap();

let mut dst_store = ImageStoreMut::<u16, 4>::alloc_with_depth(
dimensions.0 as usize / 3,
dimensions.1 as usize / 3,
dimensions.0 as usize,
dimensions.1 as usize / 2,
10,
);

scaler
.resize_rgba_u16(&store, &mut dst_store, false)
.resize_rgba_u16(&store, &mut dst_store, true)
.unwrap();

let elapsed_time = start_time.elapsed();
Expand Down Expand Up @@ -167,7 +165,7 @@ fn main() {
.iter()
.map(|&x| (x >> 2) as u8)
.collect();
//

// let dst = dst_store.as_bytes();
// let dst = resized;
// image::save_buffer(
Expand Down
6 changes: 4 additions & 2 deletions src/alpha_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
use num_traits::AsPrimitive;
use std::ops::{AddAssign, BitXor};

#[allow(dead_code)]
pub(crate) fn has_non_constant_cap_alpha_rgba8(store: &[u8], width: usize) -> bool {
has_non_constant_cap_alpha::<u8, u32, 3, 4>(store, width)
}

#[allow(dead_code)]
pub(crate) fn has_non_constant_cap_alpha_rgba16(store: &[u16], width: usize) -> bool {
has_non_constant_cap_alpha::<u16, u64, 3, 4>(store, width)
}
Expand Down Expand Up @@ -61,7 +63,7 @@ where
if store.is_empty() {
return false;
}
let first = store[0];
let first = store[ALPHA_CHANNEL_INDEX];
let mut row_sums: J = 0u32.as_();
for row in store.chunks_exact(width * CHANNELS) {
for color in row.chunks_exact(CHANNELS) {
Expand All @@ -87,7 +89,7 @@ fn has_non_constant_cap_alpha_f32_impl<const ALPHA_CHANNEL_INDEX: usize, const C
if store.is_empty() {
return false;
}
let first = store[0].to_bits();
let first = store[ALPHA_CHANNEL_INDEX].to_bits();
let mut row_sums: u64 = 0u64;
for row in store.chunks_exact(width * CHANNELS) {
for color in row.chunks_exact(CHANNELS) {
Expand Down
Loading

0 comments on commit 32a1304

Please sign in to comment.