2222public class ImageColors {
2323
2424 private static final VectorSpecies <Byte > RGB_SPECIES = ByteVector .SPECIES_PREFERRED ;
25+ /**
26+ * Some color manipulations that are implemented here are easier to understand
27+ * if they compute full pixels, i.e. always three RGB bytes at a time. So we limit
28+ * the steps taken for each vector to the largest multiple of three (e.g. 30 instead of 32).
29+ */
2530 private static final int RGB_STEPS = RGB_SPECIES .length () - RGB_SPECIES .length () % 3 ;
2631
2732 private static final VectorShuffle <Byte > COLOR_SHUFFLE = VectorShuffle .fromOp (RGB_SPECIES , ImageColors ::rotateRgbValues );
@@ -107,16 +112,12 @@ private static byte[] invertColors(byte[] image) {
107112 private static byte [] invertColors_vectorized (byte [] image ) {
108113 byte [] newImage = new byte [image .length ];
109114
110- // The species can be larger than the steps taken in each loop
111- // (i.e. `RGB_SPECIES.length() >= RGB_STEPS`), which creates
112- // the risk that the last iteration wants to write to an array
113- // that could contain `RGB_STEPS` more values but not
114- // `RGB_SPECIES.length()` more values. To prevent that, execute
115- // one fewer vectorized loop.
116- int loopBound = RGB_SPECIES .loopBound (image .length ) - RGB_STEPS ;
115+ int loopBound = RGB_SPECIES .loopBound (image .length );
117116 int pixel = 0 ;
118- // vectorized loop
119- for (; pixel < loopBound ; pixel += RGB_STEPS ) {
117+ // vectorized loop:
118+ // to invert colors, we can ignore how three color values form one pixel
119+ // and advance in steps of species length
120+ for (; pixel < loopBound ; pixel += RGB_SPECIES .length ()) {
120121 var rgbValues = ByteVector .fromArray (RGB_SPECIES , image , pixel );
121122 var newRgbValues = rgbValues .neg ();
122123 newRgbValues .intoArray (newImage , pixel );
@@ -162,10 +163,18 @@ private static byte[] rotateColors(byte[] image) {
162163 private static byte [] rotateColors_vectorized (byte [] image ) {
163164 byte [] newImage = new byte [image .length ];
164165
165- // see comment in `invertColors_vectorized`
166+ // Because the loop advances in `RGB_STEPS`, not the number of lanes
167+ // as intended by the vector API, the loop bound it computes can
168+ // be too high.
169+ // E.g. for 32 lanes and image length 64, `RGB_SPECIES.loopBound(image.length)`
170+ // is 64 for the two iterations [0..31] and [32..63]. But if we only
171+ // take steps of 30, the loop condition would lead to iterations [0..29],
172+ // [30..59], [60..💥]. Subtracting `RGB_STEPS` prevents that.
166173 int loopBound = RGB_SPECIES .loopBound (image .length ) - RGB_STEPS ;
167174 int pixel = 0 ;
168- // vectorized loop
175+ // vectorized loop:
176+ // for rotating colors, it's helpful to always deal in color value triples
177+ // (i.e. full pixels), so advance in `RGB_STEPS`
169178 for (; pixel < loopBound ; pixel += RGB_STEPS ) {
170179 var rgbValues = ByteVector .fromArray (RGB_SPECIES , image , pixel );
171180 var newRgbValues = rgbValues .rearrange (COLOR_SHUFFLE );
@@ -234,10 +243,12 @@ private static byte[] purpleShift_vectorized(byte[] image) {
234243 byte [] newImage = new byte [image .length ];
235244
236245 double imageLength = image .length ;
237- // see comment in `invertColors_vectorized `
246+ // see comment in `rotateColors_vectorized `
238247 int loopBound = RGB_SPECIES .loopBound (image .length ) - RGB_STEPS ;
239248 int pixel = 0 ;
240- // vectorized loop
249+ // vectorized loop:
250+ // for shifting the color, it's helpful to always deal in color value triples
251+ // (i.e. full pixels), so advance in `RGB_STEPS`
241252 for (; pixel < loopBound ; pixel += RGB_STEPS ) {
242253 // Deviating from the classic loop, the quotient is not computed for each pixel,
243254 // but for each "pixel block" of length `RGB_STEPS`. This means the resulting image
@@ -247,9 +258,7 @@ private static byte[] purpleShift_vectorized(byte[] image) {
247258 byte purpleIndex = (byte ) (255 * purpleQuotient );
248259
249260 var rgbValues = ByteVector .fromArray (RGB_SPECIES , image , pixel );
250- var purpleRgbValues = (ByteVector ) RGB_SPECIES
251- .broadcast (0 )
252- .blend (purpleIndex , PURPLE_SHIFT );
261+ var purpleRgbValues = ByteVector .zero (RGB_SPECIES ).blend (purpleIndex , PURPLE_SHIFT );
253262 var purpleMask = rgbValues .compare (VectorOperators .UNSIGNED_LT , purpleRgbValues );
254263 var newRgbValues = rgbValues .blend (purpleRgbValues , purpleMask );
255264
0 commit comments