Skip to content

Commit 5a767e1

Browse files
author
Nicolai Parlog
committed
Improve vector API demo based on Jorn's review
1 parent 841bae1 commit 5a767e1

File tree

1 file changed

+25
-16
lines changed

1 file changed

+25
-16
lines changed

src/main/java/dev/nipafx/demo/java_next/api/vector/ImageColors.java

+25-16
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
public class ImageColors {
2323

2424
private static final VectorSpecies<Byte> RGB_SPECIES = ByteVector.SPECIES_PREFERRED;
25+
/**
26+
* Some color manipulations that are implemented here are easier to understand
27+
* if they compute full pixels, i.e. always three RGB bytes at a time. So we limit
28+
* the steps taken for each vector to the largest multiple of three (e.g. 30 instead of 32).
29+
*/
2530
private static final int RGB_STEPS = RGB_SPECIES.length() - RGB_SPECIES.length() % 3;
2631

2732
private static final VectorShuffle<Byte> COLOR_SHUFFLE = VectorShuffle.fromOp(RGB_SPECIES, ImageColors::rotateRgbValues);
@@ -107,16 +112,12 @@ private static byte[] invertColors(byte[] image) {
107112
private static byte[] invertColors_vectorized(byte[] image) {
108113
byte[] newImage = new byte[image.length];
109114

110-
// The species can be larger than the steps taken in each loop
111-
// (i.e. `RGB_SPECIES.length() >= RGB_STEPS`), which creates
112-
// the risk that the last iteration wants to write to an array
113-
// that could contain `RGB_STEPS` more values but not
114-
// `RGB_SPECIES.length()` more values. To prevent that, execute
115-
// one fewer vectorized loop.
116-
int loopBound = RGB_SPECIES.loopBound(image.length) - RGB_STEPS;
115+
int loopBound = RGB_SPECIES.loopBound(image.length);
117116
int pixel = 0;
118-
// vectorized loop
119-
for (; pixel < loopBound; pixel += RGB_STEPS) {
117+
// vectorized loop:
118+
// to invert colors, we can ignore how three color values form one pixel
119+
// and advance in steps of species length
120+
for (; pixel < loopBound; pixel += RGB_SPECIES.length()) {
120121
var rgbValues = ByteVector.fromArray(RGB_SPECIES, image, pixel);
121122
var newRgbValues = rgbValues.neg();
122123
newRgbValues.intoArray(newImage, pixel);
@@ -162,10 +163,18 @@ private static byte[] rotateColors(byte[] image) {
162163
private static byte[] rotateColors_vectorized(byte[] image) {
163164
byte[] newImage = new byte[image.length];
164165

165-
// see comment in `invertColors_vectorized`
166+
// Because the loop advances in `RGB_STEPS`, not the number of lanes
167+
// as intended by the vector API, the loop bound it computes can
168+
// be too high.
169+
// E.g. for 32 lanes and image length 64, `RGB_SPECIES.loopBound(image.length)`
170+
// is 64 for the two iterations [0..31] and [32..63]. But if we only
171+
// take steps of 30, the loop condition would lead to iterations [0..29],
172+
// [30..59], [60..💥]. Subtracting `RGB_STEPS` prevents that.
166173
int loopBound = RGB_SPECIES.loopBound(image.length) - RGB_STEPS;
167174
int pixel = 0;
168-
// vectorized loop
175+
// vectorized loop:
176+
// for rotating colors, it's helpful to always deal in color value triples
177+
// (i.e. full pixels), so advance in `RGB_STEPS`
169178
for (; pixel < loopBound; pixel += RGB_STEPS) {
170179
var rgbValues = ByteVector.fromArray(RGB_SPECIES, image, pixel);
171180
var newRgbValues = rgbValues.rearrange(COLOR_SHUFFLE);
@@ -234,10 +243,12 @@ private static byte[] purpleShift_vectorized(byte[] image) {
234243
byte[] newImage = new byte[image.length];
235244

236245
double imageLength = image.length;
237-
// see comment in `invertColors_vectorized`
246+
// see comment in `rotateColors_vectorized`
238247
int loopBound = RGB_SPECIES.loopBound(image.length) - RGB_STEPS;
239248
int pixel = 0;
240-
// vectorized loop
249+
// vectorized loop:
250+
// for shifting the color, it's helpful to always deal in color value triples
251+
// (i.e. full pixels), so advance in `RGB_STEPS`
241252
for (; pixel < loopBound; pixel += RGB_STEPS) {
242253
// Deviating from the classic loop, the quotient is not computed for each pixel,
243254
// but for each "pixel block" of length `RGB_STEPS`. This means the resulting image
@@ -247,9 +258,7 @@ private static byte[] purpleShift_vectorized(byte[] image) {
247258
byte purpleIndex = (byte) (255 * purpleQuotient);
248259

249260
var rgbValues = ByteVector.fromArray(RGB_SPECIES, image, pixel);
250-
var purpleRgbValues = (ByteVector) RGB_SPECIES
251-
.broadcast(0)
252-
.blend(purpleIndex, PURPLE_SHIFT);
261+
var purpleRgbValues = ByteVector.zero(RGB_SPECIES).blend(purpleIndex, PURPLE_SHIFT);
253262
var purpleMask = rgbValues.compare(VectorOperators.UNSIGNED_LT, purpleRgbValues);
254263
var newRgbValues = rgbValues.blend(purpleRgbValues, purpleMask);
255264

0 commit comments

Comments
 (0)