diff --git a/arm/arm_init.c b/arm/arm_init.c index 44f92f7b2a..63281fc869 100644 --- a/arm/arm_init.c +++ b/arm/arm_init.c @@ -52,23 +52,182 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp) #define png_target_init_filter_functions_impl png_init_filter_functions_neon -#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE /*TODO*/ -#include "palette_neon_intrinsics.c" - -/* TODO: - * png_target_free_data_impl +#ifdef PNG_TARGET_STORES_DATA +/* png_target_free_data_impl * Must be defined if the implementation stores data in * png_struct::target_data. Need not be defined otherwise. - * - * png_target_init_palette_support_impl - * Contains code to initialize a palette transformation. This returns - * true if something has been set up. Only called if the state contains - * png_target_palette, need not be defined, may cancel the state flag - * in the png_struct to prevent further calls. - * - * png_target_do_expand_palette - * Handles palette expansion. Need not be defined, only called if the - * state contains png_target_palette, may set this flag to zero, may - * return false to indicate that the expansion was not done. */ +static void +png_target_free_data_arm(png_structrp pp) +{ + png_voidp ptr = pp->target_data; + pp->target_data = NULL; + png_free(pp, ptr); +} +#define png_target_free_data_impl png_target_free_data_arm +#endif /* TARGET_STORES_DATA */ + +#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE +/* png_target_do_expand_palette_impl [flag: png_target_expand_palette] + * static function + * OPTIONAL + * Handles the transform. Need not be defined, only called if the + * state contains png_target_, may set this flag to zero, may + * return false to indicate that the transform was not done (so the + * C implementation must then execute). + */ +#include "palette_neon_intrinsics.c" + +static int +png_target_do_expand_palette_neon(png_structrp png_ptr, png_row_infop row_info, + png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, + int num_trans) +{ + /* NOTE: it is important that this is done. row_info->width is not a CSE + * because the pointer is not declared with the 'restrict' parameter, this + * makes it a CSE but then it is very important that no one changes it in + * this function, hence the const. + */ + const png_uint_32 row_width = row_info->width; + + /* NOTE: this is pretty much the original code: + * + * 1) The original code only works when the original PNG has 8-bits per + * palette. This test was in pngrtran.c and is now here. + * + * 2) The original code starts at the end and works backward but then stops + * when it is within 16 bytes of the start. It then left the remainder to + * the original code in pngrtran.c That code is now here. + * + * 3) The original code takes pointers to the end of the input and the end of + * the output; this is the way png_do_expand_palette works becuase it + * has to copy down from the end (otherwise it would overwrite the input + * data before it read it). Note that the row buffer is aliased by + * these two pointers. + * + * A consequence of passing pointers is that the row pointers (input and + * output) are forced into memory (they can't be in registers). This + * could be fixed and some compilers may be able to handle this but + * no changes have been made to the original ARM code at this point. + */ + if (row_info->color_type == PNG_COLOR_TYPE_PALETTE && + row_info->bit_depth == 8 /* <8 requires a bigger "riffled" palette */) + { + png_const_bytep sp = row + (row_width - 1); /* 8 bit palette index */ + if (num_trans > 0) + { + /* This case needs a "riffled" palette. In this implementation the + * initialization is done here, on demand. + */ + if (png_ptr->target_data == NULL) + { + /* Initialize the accelerated palette expansion. + * + * The data is now allocated using png_malloc_warn so the code + * does not error out on OOM. + */ + png_ptr->target_data = png_malloc_warn(png_ptr, 256 * 4); + + /* On allocation error it is essential to clear the flag or a + * massive number of warnings will be output. + */ + if (png_ptr->target_data != NULL) + png_riffle_palette_neon(png_ptr->target_data, palette, + trans_alpha, num_trans); + else + goto clear_flag; + } + + /* This is the general convention in the core transform code; when + * expanding the number of bytes in the row copy down (necessary) and + * pass a pointer to the last byte, not the first. + * + * It does not have to be preserved here but maybe it is better this + * way despite the fact that the comments in the neon palette code + * obfuscate what is happening. + */ + png_bytep dp = row + (4/*RGBA*/*row_width - 1); + + /* Cosmin Truta: "Sometimes row_info->bit_depth has been changed to 8. + * In these cases, the palette hasn't been riffled." + * + * John Bowler: Explanation: The code in png_do_palette_expand + * *invariably* changes the bit depth to 8. So low palette bit depth + * gets expanded to 8 and png_row_info is adjusted to reflect this (see + * png_do_palette_expand), however the "riffle" initialization code + * checked the original png_ptr bit depth, so it didn't know this would + * happen... + * + * This could be changed; the original bit depth is irrelevant to the + * initialization code. + */ + png_uint_32 i = png_target_do_expand_palette_rgba8_neon( + png_ptr->target_data, row_info->width, &sp, &dp); + + if (i == 0) /* nothing was done */ + return 0; /* Return here: interlaced images start out narrow */ + + /* Now 'i' make not have reached row_width. + * NOTE: [i] is not the index into the row buffer, rather than is + * [row_width-i], this is the way it is done in the original + * png_do_expand_palette. + */ + for (; i < row_width; i++) + { + if ((int)(*sp) >= num_trans) + *dp-- = 0xff; + else + *dp-- = trans_alpha[*sp]; + *dp-- = palette[*sp].blue; + *dp-- = palette[*sp].green; + *dp-- = palette[*sp].red; + sp--; + } + + /* Finally update row_info to reflect the expanded output: */ + row_info->bit_depth = 8; + row_info->pixel_depth = 32; + row_info->rowbytes = row_width * 4; + row_info->color_type = 6; + row_info->channels = 4; + return 1; + } + else + { + /* No tRNS chunk (num_trans == 0), expand to RGB not RGBA. */ + png_bytep dp = row + (3/*RGB*/*row_width - 1); + + png_uint_32 i = png_target_do_expand_palette_rgb8_neon(palette, + row_info->width, &sp, &dp); + + if (i == 0) + return 0; /* Return here: interlaced images start out narrow */ + + /* Finish the last bytes: */ + for (; i < row_width; i++) + { + *dp-- = palette[*sp].blue; + *dp-- = palette[*sp].green; + *dp-- = palette[*sp].red; + sp--; + } + + row_info->bit_depth = 8; + row_info->pixel_depth = 24; + row_info->rowbytes = row_width * 3; + row_info->color_type = 2; + row_info->channels = 3; + return 1; + } + } + +clear_flag: + /* Here on malloc failure and on an inapplicable image. */ + png_ptr->target_state &= ~png_target_expand_palette; + return 0; +} + +#define png_target_do_expand_palette_impl png_target_do_expand_palette_neon +/* EXPAND_PALETTE */ + #endif /*TODO*/ diff --git a/arm/check.h b/arm/check.h index e9b0696cb1..8fb3521a82 100644 --- a/arm/check.h +++ b/arm/check.h @@ -12,8 +12,8 @@ # define PNG_TARGET_CODE_IMPLEMENTATION "arm/arm_init.c" # define PNG_TARGET_IMPLEMENTS_FILTERS # ifdef PNG_READ_EXPAND_SUPPORTED - /*TODO: # define PNG_TARGET_STORES_DATA */ - /*TODO: # define PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE */ +# define PNG_TARGET_STORES_DATA +# define PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE # endif /* READ_EXPAND */ # define PNG_TARGET_ROW_ALIGNMENT 16 #endif /* ARM_NEON */ diff --git a/arm/palette_neon_intrinsics.c b/arm/palette_neon_intrinsics.c index 706daef167..0c29fad0ea 100644 --- a/arm/palette_neon_intrinsics.c +++ b/arm/palette_neon_intrinsics.c @@ -11,12 +11,9 @@ /* Build an RGBA8 palette from the separate RGB and alpha palettes. */ static void -png_riffle_palette_neon(png_structrp png_ptr) +png_riffle_palette_neon(png_bytep riffled_palette, png_const_colorp palette, + png_const_bytep trans_alpha, int num_trans) { - png_const_colorp palette = png_ptr->palette; - png_bytep riffled_palette = png_ptr->riffled_palette; - png_const_bytep trans_alpha = png_ptr->trans_alpha; - int num_trans = png_ptr->num_trans; int i; /* Initially black, opaque. */ @@ -47,19 +44,15 @@ png_riffle_palette_neon(png_structrp png_ptr) } /* Expands a palettized row into RGBA8. */ -static int -png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info, - png_const_bytep row, png_bytepp ssp, png_bytepp ddp) +static png_uint_32 +png_target_do_expand_palette_rgba8_neon(const png_uint_32 *riffled_palette, + png_uint_32 row_width, png_const_bytep *ssp, png_bytep *ddp) { - png_uint_32 row_width = row_info->width; - const png_uint_32 *riffled_palette = - (const png_uint_32 *)png_ptr->riffled_palette; const png_uint_32 pixels_per_chunk = 4; png_uint_32 i; png_debug(1, "in png_do_expand_palette_rgba8_neon"); - PNG_UNUSED(row) if (row_width < pixels_per_chunk) return 0; @@ -72,7 +65,8 @@ png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info, for (i = 0; i < row_width; i += pixels_per_chunk) { uint32x4_t cur; - png_bytep sp = *ssp - i, dp = *ddp - (i << 2); + png_const_bytep sp = *ssp - i; + png_bytep dp = *ddp - (i << 2); cur = vld1q_dup_u32 (riffled_palette + *(sp - 3)); cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1); cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2); @@ -92,18 +86,18 @@ png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info, } /* Expands a palettized row into RGB8. */ -static int -png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info, - png_const_bytep row, png_bytepp ssp, png_bytepp ddp) +static png_uint_32 +png_target_do_expand_palette_rgb8_neon(png_const_colorp paletteIn, + png_uint_32 row_width, png_const_bytep *ssp, png_bytep *ddp) { - png_uint_32 row_width = row_info->width; - png_const_bytep palette = (png_const_bytep)png_ptr->palette; + /* TODO: This case is VERY dangerous: */ + png_const_bytep palette = (png_const_bytep)paletteIn; + const png_uint_32 pixels_per_chunk = 8; png_uint_32 i; png_debug(1, "in png_do_expand_palette_rgb8_neon"); - PNG_UNUSED(row) if (row_width <= pixels_per_chunk) return 0; @@ -113,7 +107,8 @@ png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info, for (i = 0; i < row_width; i += pixels_per_chunk) { uint8x8x3_t cur; - png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i); + png_const_bytep sp = *ssp - i; + png_bytep dp = *ddp - ((i << 1) + i); cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7))); cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1); cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2); diff --git a/pngpriv.h b/pngpriv.h index 9272455fbe..c3a543a461 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -1100,7 +1100,7 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row,(png_structrp pp, png_row_infop * available for this image. */ #define png_target_filters 1 /* MASK: hardware support for filters */ -#define png_target_palette 2 /* MASK: hardware support for palettes */ +#define png_target_expand_palette 2 /* MASK: hardware support for palettes */ PNG_INTERNAL_FUNCTION(void,png_target_init,(png_structrp),PNG_EMPTY); /* Initialize png_struct::target_state if required. */ @@ -1115,15 +1115,17 @@ PNG_INTERNAL_FUNCTION(void, png_target_init_filter_functions, * implementation. Called once before the first row needs to be defiltered. */ -PNG_INTERNAL_FUNCTION(void, png_target_init_palette_support, (png_structrp), - PNG_EMPTY); -PNG_INTERNAL_FUNCTION(int, png_target_do_expand_palette, (png_structrp, - png_row_infop, png_const_bytep, const png_bytepp, const png_bytepp), - PNG_EMPTY); - /* Two functions to set up and execute palette expansion. The 'init' - * must succeed but then the 'do_expand' might, apparently, still fail. - */ -#endif /* HARDWARE */ +/* Handlers for specific transforms (currently only 'expand_palette'). These + * are implemented in pngsimd.c to call the actual SIMD implementation if + * required. + * + * The handlers return "false" if nothing was done and the C code will then be + * called. The implementations must do everything or nothing. + */ +PNG_INTERNAL_FUNCTION(int, png_target_do_expand_palette, + (png_structrp, png_row_infop), PNG_EMPTY); + /* Expand the palette and return true or do nothing and return false. */ +#endif /* TARGET_CODE */ /* Choose the best filter to use and filter the row data */ PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, diff --git a/pngrtran.c b/pngrtran.c index 4af9fe04ef..545e97b644 100644 --- a/pngrtran.c +++ b/pngrtran.c @@ -4191,9 +4191,8 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr) * upon whether you supply trans and num_trans. */ static void -png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, - png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, - int num_trans) +png_do_expand_palette(png_row_infop row_info, png_bytep row, png_const_colorp + palette, png_const_bytep trans_alpha, int num_trans) { int shift, value; png_bytep sp, dp; @@ -4298,21 +4297,7 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, dp = row + ((size_t)row_width << 2) - 1; i = 0; -#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE - if ((png_ptr->target_state & png_target_palette) != 0) - { - /* The RGBA optimization works with png_ptr->bit_depth == 8 - * but sometimes row_info->bit_depth has been changed to 8. - * In these cases, the palette hasn't been riffled. - */ - i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row, - &sp, &dp); - } -#else - PNG_UNUSED(png_ptr) -#endif - - for (; i < row_width; i++) + for (i = 0; i < row_width; i++) { if ((int)(*sp) >= num_trans) *dp-- = 0xff; @@ -4334,15 +4319,7 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, { sp = row + (size_t)row_width - 1; dp = row + (size_t)(row_width * 3) - 1; - i = 0; -#ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE - i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row, - &sp, &dp); -#else - PNG_UNUSED(png_ptr) -#endif - - for (; i < row_width; i++) + for (i = 0; i < row_width; i++) { *dp-- = palette[*sp].blue; *dp-- = palette[*sp].green; @@ -4757,18 +4734,13 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) { #ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE - if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) - { - if (png_ptr->riffled_palette == NULL) - { - /* Initialize the accelerated palette expansion. */ - png_ptr->riffled_palette = - (png_bytep)png_malloc(png_ptr, 256 * 4); - png_riffle_palette_neon(png_ptr); - } - } + /* Do not call 'png_do_expand_palette' if the SIMD implementation + * does it (note that this accomodates SIMD implementations which might + * only handle specific cases). + */ + if (!png_target_do_expand_palette(png_ptr, row_info)) #endif - png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, + png_do_expand_palette(row_info, png_ptr->row_buf + 1, png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); } diff --git a/pngsimd.c b/pngsimd.c index 7c1cb2f166..91c04fe65d 100644 --- a/pngsimd.c +++ b/pngsimd.c @@ -35,29 +35,22 @@ * UNDEFINED if PNG_TARGET_STORES_DATA is not defined * A function to free data stored in png_struct::target_data. * - * png_target_init_filter_functions_impl + * png_target_init_filter_functions_impl [flag: png_target_filters] * OPTIONAL * Contains code to overwrite the png_struct::read_filter array, see * the definition of png_init_filter_functions. Need not be defined, * only called if target_state contains png_target_filters. * - * png_target_init_palette_support_impl + * png_target_do_expand_palette_impl [flag: png_target_expand_palette] * static function * OPTIONAL - * Contains code to initialize a palette transformation. This returns - * true if something has been set up. Only called if the state contains - * png_target_palette, need not be defined, may cancel the state flag - * in the png_struct to prevent further calls. + * Handles the transform. Need not be defined, only called if the + * state contains png_target_, may set this flag to zero, may + * return false to indicate that the transform was not done (so the + * C implementation must then execute). * - * png_target_do_expand_palette_impl - * static function - * OPTIONAL - * Handles palette expansion. Need not be defined, only called if the - * state contains png_target_palette, may set this flag to zero, may - * return false to indicate that the expansion was not done. - * - * Either png_target_init_filter_functions_impl or - * png_target_do_expand_palette_impl must be defined. + * Note that pngtarget.h verifies that at least one thing is implemented, the + * checks below ensure that the corresponding _impl macro is defined. */ /* This will fail in an obvious way with a meaningful error message if the file @@ -70,12 +63,17 @@ #endif #if defined(PNG_TARGET_STORES_DATA) != defined(png_target_free_data_impl) -# error HARDWARE: PNG_TARGET_STORES_DATA !match png_target_free_data_impl +# error HARDWARE: png_target_free_data_impl unexpected setting #endif -#if !defined(png_target_init_filter_functions_impl) &&\ - !defined(png_target_init_palette_support) -# error HARDWARE: target specifc code turned on but none provided +#if defined(PNG_TARGET_IMPLEMENTS_FILTERS) !=\ + defined(png_target_init_filter_functions_impl) +# error HARDWARE: png_target_init_filter_functions_impl unexpected setting +#endif + +#if defined(PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE) !=\ + defined(png_target_do_expand_palette_impl) +# error HARDWARE: png_target_do_expand_palette_impl unexpected setting #endif void @@ -88,7 +86,7 @@ png_target_init(png_structrp pp) # define F 0U # endif # ifdef png_target_do_expand_palette_impl -# define P png_target_palette +# define P png_target_expand_palette # else # define P 0U # endif @@ -101,9 +99,6 @@ png_target_init(png_structrp pp) } #ifdef PNG_TARGET_STORES_DATA -#ifndef png_target_free_data_impl -# error PNG_TARGET_STORES_DATA defined without implementation -#endif void png_target_free_data(png_structrp pp) { @@ -111,7 +106,7 @@ png_target_free_data(png_structrp pp) */ if (pp->target_data != NULL) { - png_target_free_data_impl(pp); + png_target_free_data_impl(pp); if (pp->target_data != NULL) png_error(pp, png_target_impl ": allocated data not released"); } @@ -119,9 +114,6 @@ png_target_free_data(png_structrp pp) #endif #ifdef PNG_TARGET_IMPLEMENTS_FILTERS -#ifndef png_target_init_filter_functions_impl -# error PNG_TARGET_IMPLEMENTS_FILTERS defined without implementation -#endif void png_target_init_filter_functions(png_structp pp, unsigned int bpp) { @@ -132,55 +124,17 @@ png_target_init_filter_functions(png_structp pp, unsigned int bpp) #endif /* filters */ #ifdef PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE -#ifndef png_target_init_palette_support_impl -# error PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE defined without implementation -#endif -void -png_target_init_palette_support(png_structrp pp) -{ - if (((pp->options >> PNG_TARGET_SPECIFIC_CODE) & 3) == PNG_OPTION_ON && - (pp->target_state & png_target_palette) != 0 && - !png_target_init_palette_support_impl(pp, bpp)) - png_ptr->target_state &= ~png_target_palette; -} - -#ifndef png_target_do_expand_palette_impl -# error PNG_TARGET_IMPLEMENTS_EXPAND_PALETTE defined without implementation -#endif int -png_target_do_expand_palette(png_structrp pp, png_row_infop rip, - png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) +png_target_do_expand_palette(png_structrp pp, png_row_infop rip) + /*png_const_bytep row, const png_bytepp ssp, const png_bytepp ddp) */ { - if (((pp->options >> PNG_TARGET_SPECIFIC_CODE) & 3) == PNG_OPTION_ON && - (pp->target_state & png_target_palette) != 0) - return png_target_do_expand_palette_impl(pp, rip, row, ssp, ddp); + /* This is exactly like 'png_do_expand_palette' except that there is a check + * on the options and target_state: + */ + return ((pp->options >> PNG_TARGET_SPECIFIC_CODE) & 3) == PNG_OPTION_ON && + (pp->target_state & png_target_expand_palette) != 0 && + png_target_do_expand_palette_impl(pp, rip, pp->row_buf + 1, + pp->palette, pp->trans_alpha, pp->num_trans); } -#endif /* palette */ - -/* - * png_target_init_impl - * Set the mask of png_target_support values to - * png_struct::target_state. If the value is non-0 hardware support - * will be recorded as enabled. - * - * png_target_free_data_impl - * Must be defined if the implementation stores data in - * png_struct::target_data. Need not be defined otherwise. - * - * png_target_init_filter_functions_impl - * Contains code to overwrite the png_struct::read_filter array, see - * the definition of png_init_filter_functions. Need not be defined, - * only called if the state contains png_target_filters. - * - * png_target_init_palette_support_impl - * Contains code to initialize a palette transformation. This returns - * true if something has been set up. Only called if the state contains - * png_target_palette, need not be defined, may cancel the state flag - * in the png_struct to prevent further calls. - * - * png_target_do_expand_palette - * Handles palette expansion. Need not be defined, only called if the - * state contains png_target_palette, may set this flag to zero, may - * return false to indicate that the expansion was not done. - */ +#endif /* EXPAND_PALETTE */ #endif /* PNG_TARGET_ARCH */