oracle
diff --git a/‎compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java
Lines changed: 25 additions & 4 deletions b/‎compiler/src/org.graalvm.compiler.asm.aarch64/src/org/graalvm/compiler/asm/aarch64/AArch64ASIMDAssembler.java
Lines changed: 25 additions & 4 deletions
diff --git a/‎compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java
Lines changed: 14 additions & 0 deletions b/‎compiler/src/org.graalvm.compiler.core.aarch64/src/org/graalvm/compiler/core/aarch64/AArch64LIRGenerator.java
Lines changed: 14 additions & 0 deletions
diff --git a/‎compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotBackendFactory.java
Lines changed: 1 addition & 1 deletion b/‎compiler/src/org.graalvm.compiler.hotspot.aarch64/src/org/graalvm/compiler/hotspot/aarch64/AArch64HotSpotBackendFactory.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java
Lines changed: 0 additions & 4 deletions b/‎compiler/src/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java
Lines changed: 0 additions & 4 deletions
diff --git a/‎compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64StringLatin1InflateOp.java
Lines changed: 171 additions & 0 deletions b/‎compiler/src/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64StringLatin1InflateOp.java
Lines changed: 171 additions & 0 deletions
@@ -3036,7 +3036,7 @@ public void ushlVVV(ASIMDSize size, ElementSize eSize, Register dst, Register sr
     /**
      * C7.2.391 Unsigned shift left long (immediate).<br>
      * <p>
-     * From the manual: " This instruction reads each vector element in the lower half of the source
+     * From the manual: "This instruction reads each vector element in the lower half of the source
      * SIMD&FP register, shifts the unsigned integer value left by the specified number of bits ...
      * The destination vector elements are twice as long as the source vector elements."
      *
@@ -3063,12 +3063,12 @@ public void ushllVVI(ElementSize srcESize, Register dst, Register src, int shift
     /**
      * C7.2.391 Unsigned shift left long (immediate).<br>
      * <p>
-     * From the manual: " This instruction reads each vector element in the upper half of the source
+     * From the manual: "This instruction reads each vector element in the upper half of the source
      * SIMD&FP register, shifts the unsigned integer value left by the specified number of bits ...
      * The destination vector elements are twice as long as the source vector elements."
      *
      * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination
-     *            element size will be double this width.
+     *            element size will be twice this width.
      * @param dst SIMD register.
      * @param src SIMD register.
      * @param shiftAmt shift left amount.
@@ -3189,7 +3189,8 @@ public void uzp2VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register
      * C7.2.402 Extract narrow.<br>
      * <p>
      * From the manual: "This instruction reads each vector element from the source SIMD&FP
-     * register, narrows each value to half the original width, and writes the register..."
+     * register, narrows each value to half the original width, and writes into the lower half of
+     * the destination register..."
      *
      * @param dstESize destination element size. Cannot be ElementSize.DoubleWord. The source
      *            element size is twice this width.
@@ -3204,6 +3205,26 @@ public void xtnVV(ElementSize dstESize, Register dst, Register src) {
         twoRegMiscEncoding(ASIMDInstruction.XTN, false, elemSizeXX(dstESize), dst, src);
     }
 
+    /**
+     * C7.2.402 Extract narrow.<br>
+     * <p>
+     * From the manual: "This instruction reads each vector element from the source SIMD&FP
+     * register, narrows each value to half the original width, and writes into the upper half of
+     * the destination register..."
+     *
+     * @param dstESize destination element size. Cannot be ElementSize.DoubleWord. The source
+     *            element size is twice this width.
+     * @param dst SIMD register.
+     * @param src SIMD register.
+     */
+    public void xtn2VV(ElementSize dstESize, Register dst, Register src) {
+        assert dst.getRegisterCategory().equals(SIMD);
+        assert src.getRegisterCategory().equals(SIMD);
+        assert dstESize != ElementSize.DoubleWord;
+
+        twoRegMiscEncoding(ASIMDInstruction.XTN, true, elemSizeXX(dstESize), dst, src);
+    }
+
     /**
      * C7.2.403 Zip vectors (primary).
      * <p>
 
@@ -71,6 +71,8 @@
 import org.graalvm.compiler.lir.aarch64.AArch64Move.MembarOp;
 import org.graalvm.compiler.lir.aarch64.AArch64PauseOp;
 import org.graalvm.compiler.lir.aarch64.AArch64SpeculativeBarrier;
+import org.graalvm.compiler.lir.aarch64.AArch64StringLatin1InflateOp;
+import org.graalvm.compiler.lir.aarch64.AArch64StringUTF16CompressOp;
 import org.graalvm.compiler.lir.aarch64.AArch64ZapRegistersOp;
 import org.graalvm.compiler.lir.aarch64.AArch64ZapStackOp;
 import org.graalvm.compiler.lir.aarch64.AArch64ZeroMemoryOp;
@@ -558,6 +560,18 @@ public Variable emitEncodeArray(Value src, Value dst, Value length, CharsetName
         return result;
     }
 
+    @Override
+    public void emitStringLatin1Inflate(Value src, Value dst, Value len) {
+        append(new AArch64StringLatin1InflateOp(this, asAllocatable(src), asAllocatable(dst), asAllocatable(len)));
+    }
+
+    @Override
+    public Variable emitStringUTF16Compress(Value src, Value dst, Value len) {
+        Variable result = newVariable(LIRKind.value(AArch64Kind.DWORD));
+        append(new AArch64StringUTF16CompressOp(this, asAllocatable(src), asAllocatable(dst), asAllocatable(len), result));
+        return result;
+    }
+
     @Override
     protected JavaConstant zapValueForKind(PlatformKind kind) {
         long dead = 0xDEADDEADDEADDEADL;
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2018, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
 
@@ -392,10 +392,6 @@ public UnimplementedGraalIntrinsics(GraalHotSpotVMConfig config, Architecture ar
         if (arch instanceof AArch64) {
             add(toBeInvestigated,
                             "java/lang/StringCoding.hasNegatives([BII)Z",
-                            "java/lang/StringLatin1.inflate([BI[BII)V",
-                            "java/lang/StringLatin1.inflate([BI[CII)V",
-                            "java/lang/StringUTF16.compress([BI[BII)I",
-                            "java/lang/StringUTF16.compress([CI[BII)I",
                             "java/lang/Thread.onSpinWait()V",
                             "jdk/internal/util/ArraysSupport.vectorizedMismatch(Ljava/lang/Object;JLjava/lang/Object;JII)I");
         }
 
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.lir.aarch64;
+
+import static jdk.vm.ci.aarch64.AArch64.SIMD;
+import static jdk.vm.ci.code.ValueUtil.asRegister;
+import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
+
+import org.graalvm.compiler.asm.Label;
+import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler;
+import org.graalvm.compiler.asm.aarch64.AArch64Address;
+import org.graalvm.compiler.asm.aarch64.AArch64Assembler;
+import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
+import org.graalvm.compiler.core.common.LIRKind;
+import org.graalvm.compiler.lir.LIRInstructionClass;
+import org.graalvm.compiler.lir.Opcode;
+import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
+import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
+
+import jdk.vm.ci.aarch64.AArch64Kind;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.meta.AllocatableValue;
+
+@Opcode("AArch64_STRING_INFLATE")
+public final class AArch64StringLatin1InflateOp extends AArch64LIRInstruction {
+    public static final LIRInstructionClass<AArch64StringLatin1InflateOp> TYPE = LIRInstructionClass.create(AArch64StringLatin1InflateOp.class);
+
+    private static final int CHUNK_ELEMENT_COUNT = 16;
+
+    @Use({REG}) protected AllocatableValue len;
+    @Alive({REG}) protected AllocatableValue src;
+    @Alive({REG}) protected AllocatableValue dst;
+    @Temp({REG}) protected AllocatableValue temp1;
+    @Temp({REG}) protected AllocatableValue temp2;
+    @Temp({REG}) protected AllocatableValue temp3;
+    @Temp({REG}) protected AllocatableValue vectorTemp1;
+    @Temp({REG}) protected AllocatableValue vectorTemp2;
+
+    public AArch64StringLatin1InflateOp(LIRGeneratorTool tool, AllocatableValue src, AllocatableValue dst, AllocatableValue len) {
+        super(TYPE);
+        assert len.getPlatformKind().equals(AArch64Kind.DWORD) : len;
+        assert src.getPlatformKind().equals(AArch64Kind.QWORD) : src;
+        assert dst.getPlatformKind().equals(AArch64Kind.QWORD) : dst;
+
+        this.len = len;
+        this.src = src;
+        this.dst = dst;
+        LIRKind archWordKind = LIRKind.value(AArch64Kind.QWORD);
+        temp1 = tool.newVariable(archWordKind);
+        temp2 = tool.newVariable(archWordKind);
+        temp3 = tool.newVariable(archWordKind);
+        LIRKind vectorKind = LIRKind.value(tool.target().arch.getLargestStorableKind(SIMD));
+        vectorTemp1 = tool.newVariable(vectorKind);
+        vectorTemp2 = tool.newVariable(vectorKind);
+
+    }
+
+    @Override
+    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
+        Label simdImpl = new Label();
+        Label done = new Label();
+
+        Register length = asRegister(temp1);
+        Register srcAddress = asRegister(temp2);
+        Register destAddress = asRegister(temp3);
+
+        // return immediately if length is zero
+        masm.cbz(32, asRegister(len), done);
+
+        /*
+         * Sign-extend length. Note length is guaranteed to be a non-negative value, so this is
+         * equivalent to zero-extending length.
+         */
+        masm.sxt(64, 32, length, asRegister(len));
+
+        masm.mov(64, srcAddress, asRegister(src));
+        masm.mov(64, destAddress, asRegister(dst));
+
+        masm.compare(64, length, CHUNK_ELEMENT_COUNT);
+        masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, simdImpl);
+
+        emitScalar(masm, srcAddress, destAddress, length);
+        masm.jmp(done);
+
+        masm.bind(simdImpl);
+        emitSIMD(masm, srcAddress, destAddress, length);
+
+        masm.bind(done);
+    }
+
+    private static void emitScalar(AArch64MacroAssembler masm, Register srcAddress, Register destAddress, Register count) {
+        Label loop = new Label();
+
+        try (AArch64MacroAssembler.ScratchRegister scratchReg1 = masm.getScratchRegister()) {
+            Register val = scratchReg1.getRegister();
+
+            masm.align(AArch64MacroAssembler.PREFERRED_LOOP_ALIGNMENT);
+            masm.bind(loop);
+            // ldr zero-extends val to 64 bits
+            masm.ldr(8, val, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcAddress, 1));
+            masm.str(16, val, AArch64Address.createImmediateAddress(16, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, destAddress, 2));
+            masm.subs(64, count, count, 1);
+            masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, loop);
+        }
+    }
+
+    private void emitSIMD(AArch64MacroAssembler masm, Register srcChunkAddress, Register destChunkAddress, Register length) {
+        Register destLowV = asRegister(vectorTemp1);
+        Register destHighV = asRegister(vectorTemp2);
+
+        Label simdLoop = new Label();
+        Label done = new Label();
+
+        try (AArch64MacroAssembler.ScratchRegister scratchRegister1 = masm.getScratchRegister(); AArch64MacroAssembler.ScratchRegister scratchRegister2 = masm.getScratchRegister()) {
+            Register endOfSrcAddress = scratchRegister1.getRegister();
+            Register lastChunkAddress = scratchRegister2.getRegister();
+
+            masm.add(64, endOfSrcAddress, srcChunkAddress, length);
+            masm.sub(64, lastChunkAddress, endOfSrcAddress, CHUNK_ELEMENT_COUNT);
+
+            masm.align(AArch64MacroAssembler.PREFERRED_LOOP_ALIGNMENT);
+            masm.bind(simdLoop);
+            // load elements
+            masm.fldr(128, destLowV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcChunkAddress, CHUNK_ELEMENT_COUNT));
+            // split elements across 2 registers and inflate
+            masm.neon.uxtl2VV(AArch64ASIMDAssembler.ElementSize.Byte, destHighV, destLowV);
+            masm.neon.uxtlVV(AArch64ASIMDAssembler.ElementSize.Byte, destLowV, destLowV);
+            // store inflated elements
+            masm.fstp(128, destLowV, destHighV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, destChunkAddress, CHUNK_ELEMENT_COUNT * 2));
+            masm.cmp(64, srcChunkAddress, lastChunkAddress);
+            masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, simdLoop);
+
+            /*
+             * Process the last chunk. Move the source position back to the last chunk, 16 bytes
+             * before the end of the input array. Move the destination position back twice the
+             * movement of source position.
+             */
+            masm.cmp(64, srcChunkAddress, endOfSrcAddress);
+            masm.branchConditionally(AArch64Assembler.ConditionFlag.HS, done);
+            masm.sub(64, srcChunkAddress, srcChunkAddress, lastChunkAddress);
+            masm.sub(64, destChunkAddress, destChunkAddress, srcChunkAddress, AArch64Assembler.ShiftType.LSL, 1);
+            masm.mov(64, srcChunkAddress, lastChunkAddress);
+            masm.jmp(simdLoop);
+
+            masm.bind(done);
+        }
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.`
	`2`	`+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.`
`3`	`3`	`* Copyright (c) 2018, Red Hat Inc. All rights reserved.`
`4`	`4`	`* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.`
`5`	`5`	`*`