|
| 1 | +/* |
| 2 | + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. |
| 3 | + * Copyright (c) 2022, Arm Limited. All rights reserved. |
| 4 | + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 5 | + * |
| 6 | + * This code is free software; you can redistribute it and/or modify it |
| 7 | + * under the terms of the GNU General Public License version 2 only, as |
| 8 | + * published by the Free Software Foundation. Oracle designates this |
| 9 | + * particular file as subject to the "Classpath" exception as provided |
| 10 | + * by Oracle in the LICENSE file that accompanied this code. |
| 11 | + * |
| 12 | + * This code is distributed in the hope that it will be useful, but WITHOUT |
| 13 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 14 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 15 | + * version 2 for more details (a copy is included in the LICENSE file that |
| 16 | + * accompanied this code). |
| 17 | + * |
| 18 | + * You should have received a copy of the GNU General Public License version |
| 19 | + * 2 along with this work; if not, write to the Free Software Foundation, |
| 20 | + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 21 | + * |
| 22 | + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 23 | + * or visit www.oracle.com if you need additional information or have any |
| 24 | + * questions. |
| 25 | + */ |
| 26 | +package org.graalvm.compiler.lir.aarch64; |
| 27 | + |
| 28 | +import static jdk.vm.ci.aarch64.AArch64.SIMD; |
| 29 | +import static jdk.vm.ci.code.ValueUtil.asRegister; |
| 30 | +import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; |
| 31 | + |
| 32 | +import org.graalvm.compiler.asm.Label; |
| 33 | +import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler; |
| 34 | +import org.graalvm.compiler.asm.aarch64.AArch64Address; |
| 35 | +import org.graalvm.compiler.asm.aarch64.AArch64Assembler; |
| 36 | +import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler; |
| 37 | +import org.graalvm.compiler.core.common.LIRKind; |
| 38 | +import org.graalvm.compiler.lir.LIRInstructionClass; |
| 39 | +import org.graalvm.compiler.lir.Opcode; |
| 40 | +import org.graalvm.compiler.lir.asm.CompilationResultBuilder; |
| 41 | +import org.graalvm.compiler.lir.gen.LIRGeneratorTool; |
| 42 | + |
| 43 | +import jdk.vm.ci.aarch64.AArch64Kind; |
| 44 | +import jdk.vm.ci.code.Register; |
| 45 | +import jdk.vm.ci.meta.AllocatableValue; |
| 46 | + |
| 47 | +@Opcode("AArch64_STRING_INFLATE") |
| 48 | +public final class AArch64StringLatin1InflateOp extends AArch64LIRInstruction { |
| 49 | + public static final LIRInstructionClass<AArch64StringLatin1InflateOp> TYPE = LIRInstructionClass.create(AArch64StringLatin1InflateOp.class); |
| 50 | + |
| 51 | + private static final int CHUNK_ELEMENT_COUNT = 16; |
| 52 | + |
| 53 | + @Use({REG}) protected AllocatableValue len; |
| 54 | + @Alive({REG}) protected AllocatableValue src; |
| 55 | + @Alive({REG}) protected AllocatableValue dst; |
| 56 | + @Temp({REG}) protected AllocatableValue temp1; |
| 57 | + @Temp({REG}) protected AllocatableValue temp2; |
| 58 | + @Temp({REG}) protected AllocatableValue temp3; |
| 59 | + @Temp({REG}) protected AllocatableValue vectorTemp1; |
| 60 | + @Temp({REG}) protected AllocatableValue vectorTemp2; |
| 61 | + |
| 62 | + public AArch64StringLatin1InflateOp(LIRGeneratorTool tool, AllocatableValue src, AllocatableValue dst, AllocatableValue len) { |
| 63 | + super(TYPE); |
| 64 | + assert len.getPlatformKind().equals(AArch64Kind.DWORD) : len; |
| 65 | + assert src.getPlatformKind().equals(AArch64Kind.QWORD) : src; |
| 66 | + assert dst.getPlatformKind().equals(AArch64Kind.QWORD) : dst; |
| 67 | + |
| 68 | + this.len = len; |
| 69 | + this.src = src; |
| 70 | + this.dst = dst; |
| 71 | + LIRKind archWordKind = LIRKind.value(AArch64Kind.QWORD); |
| 72 | + temp1 = tool.newVariable(archWordKind); |
| 73 | + temp2 = tool.newVariable(archWordKind); |
| 74 | + temp3 = tool.newVariable(archWordKind); |
| 75 | + LIRKind vectorKind = LIRKind.value(tool.target().arch.getLargestStorableKind(SIMD)); |
| 76 | + vectorTemp1 = tool.newVariable(vectorKind); |
| 77 | + vectorTemp2 = tool.newVariable(vectorKind); |
| 78 | + |
| 79 | + } |
| 80 | + |
| 81 | + @Override |
| 82 | + public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { |
| 83 | + Label simdImpl = new Label(); |
| 84 | + Label done = new Label(); |
| 85 | + |
| 86 | + Register length = asRegister(temp1); |
| 87 | + Register srcAddress = asRegister(temp2); |
| 88 | + Register destAddress = asRegister(temp3); |
| 89 | + |
| 90 | + // return immediately if length is zero |
| 91 | + masm.cbz(32, asRegister(len), done); |
| 92 | + |
| 93 | + /* |
| 94 | + * Sign-extend length. Note length is guaranteed to be a non-negative value, so this is |
| 95 | + * equivalent to zero-extending length. |
| 96 | + */ |
| 97 | + masm.sxt(64, 32, length, asRegister(len)); |
| 98 | + |
| 99 | + masm.mov(64, srcAddress, asRegister(src)); |
| 100 | + masm.mov(64, destAddress, asRegister(dst)); |
| 101 | + |
| 102 | + masm.compare(64, length, CHUNK_ELEMENT_COUNT); |
| 103 | + masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, simdImpl); |
| 104 | + |
| 105 | + emitScalar(masm, srcAddress, destAddress, length); |
| 106 | + masm.jmp(done); |
| 107 | + |
| 108 | + masm.bind(simdImpl); |
| 109 | + emitSIMD(masm, srcAddress, destAddress, length); |
| 110 | + |
| 111 | + masm.bind(done); |
| 112 | + } |
| 113 | + |
| 114 | + private static void emitScalar(AArch64MacroAssembler masm, Register srcAddress, Register destAddress, Register count) { |
| 115 | + Label loop = new Label(); |
| 116 | + |
| 117 | + try (AArch64MacroAssembler.ScratchRegister scratchReg1 = masm.getScratchRegister()) { |
| 118 | + Register val = scratchReg1.getRegister(); |
| 119 | + |
| 120 | + masm.align(AArch64MacroAssembler.PREFERRED_LOOP_ALIGNMENT); |
| 121 | + masm.bind(loop); |
| 122 | + // ldr zero-extends val to 64 bits |
| 123 | + masm.ldr(8, val, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcAddress, 1)); |
| 124 | + masm.str(16, val, AArch64Address.createImmediateAddress(16, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, destAddress, 2)); |
| 125 | + masm.subs(64, count, count, 1); |
| 126 | + masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, loop); |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + private void emitSIMD(AArch64MacroAssembler masm, Register srcChunkAddress, Register destChunkAddress, Register length) { |
| 131 | + Register destLowV = asRegister(vectorTemp1); |
| 132 | + Register destHighV = asRegister(vectorTemp2); |
| 133 | + |
| 134 | + Label simdLoop = new Label(); |
| 135 | + Label done = new Label(); |
| 136 | + |
| 137 | + try (AArch64MacroAssembler.ScratchRegister scratchRegister1 = masm.getScratchRegister(); AArch64MacroAssembler.ScratchRegister scratchRegister2 = masm.getScratchRegister()) { |
| 138 | + Register endOfSrcAddress = scratchRegister1.getRegister(); |
| 139 | + Register lastChunkAddress = scratchRegister2.getRegister(); |
| 140 | + |
| 141 | + masm.add(64, endOfSrcAddress, srcChunkAddress, length); |
| 142 | + masm.sub(64, lastChunkAddress, endOfSrcAddress, CHUNK_ELEMENT_COUNT); |
| 143 | + |
| 144 | + masm.align(AArch64MacroAssembler.PREFERRED_LOOP_ALIGNMENT); |
| 145 | + masm.bind(simdLoop); |
| 146 | + // load elements |
| 147 | + masm.fldr(128, destLowV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcChunkAddress, CHUNK_ELEMENT_COUNT)); |
| 148 | + // split elements across 2 registers and inflate |
| 149 | + masm.neon.uxtl2VV(AArch64ASIMDAssembler.ElementSize.Byte, destHighV, destLowV); |
| 150 | + masm.neon.uxtlVV(AArch64ASIMDAssembler.ElementSize.Byte, destLowV, destLowV); |
| 151 | + // store inflated elements |
| 152 | + masm.fstp(128, destLowV, destHighV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, destChunkAddress, CHUNK_ELEMENT_COUNT * 2)); |
| 153 | + masm.cmp(64, srcChunkAddress, lastChunkAddress); |
| 154 | + masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, simdLoop); |
| 155 | + |
| 156 | + /* |
| 157 | + * Process the last chunk. Move the source position back to the last chunk, 16 bytes |
| 158 | + * before the end of the input array. Move the destination position back twice the |
| 159 | + * movement of source position. |
| 160 | + */ |
| 161 | + masm.cmp(64, srcChunkAddress, endOfSrcAddress); |
| 162 | + masm.branchConditionally(AArch64Assembler.ConditionFlag.HS, done); |
| 163 | + masm.sub(64, srcChunkAddress, srcChunkAddress, lastChunkAddress); |
| 164 | + masm.sub(64, destChunkAddress, destChunkAddress, srcChunkAddress, AArch64Assembler.ShiftType.LSL, 1); |
| 165 | + masm.mov(64, srcChunkAddress, lastChunkAddress); |
| 166 | + masm.jmp(simdLoop); |
| 167 | + |
| 168 | + masm.bind(done); |
| 169 | + } |
| 170 | + } |
| 171 | +} |
0 commit comments