diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 8a2b3aa0436..5437ff39cd2 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -6527,6 +6527,14 @@ void Assembler::xorw(Register dst, Register src) { emit_arith(0x33, 0xC0, dst, src); } +void Assembler::xorw(Register dst, Address src) { + InstructionMark im(this); + emit_int8(0x66); + prefix(src, dst); + emit_int8(0x33); + emit_operand(dst, src, 0); +} + // AVX 3-operands scalar float-point arithmetic instructions void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index ecb1f2d587c..430fa6be639 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -2244,6 +2244,7 @@ class Assembler : public AbstractAssembler { void xorb(Address dst, Register src); void xorb(Register dst, Address src); void xorw(Register dst, Register src); + void xorw(Register dst, Address src); void xorq(Register dst, Address src); void xorq(Address dst, int32_t imm32); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp index 1a256d0913d..9744169498c 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp @@ -2181,6 +2181,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist const Register rounds = rax; const Register pos = r12; + const Register tail = r15; Label PRELOOP_START, EXIT_PRELOOP, REMAINDER, REMAINDER_16, LOOP, END, EXIT, END_LOOP, AES192, AES256, AES192_REMAINDER16, REMAINDER16_END_LOOP, AES256_REMAINDER16, @@ -2615,29 +2616,36 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist // Save encrypted counter value in xmm0 for next invocation, before XOR operation __ movdqu(Address(saved_encCounter_start, 0), xmm0); // XOR encryted block cipher in xmm0 with PT to produce CT - __ evpxorq(xmm0, xmm0, Address(src_addr, pos, Address::times_1, 0), Assembler::AVX_128bit); // extract up to 15 bytes of CT from xmm0 as specified by length register __ testptr(len_reg, 8); __ jcc(Assembler::zero, EXTRACT_TAIL_4BYTES); - __ pextrq(Address(dest_addr, pos), xmm0, 0); + __ pextrq(tail, xmm0, 0); + __ xorq(tail, Address(src_addr, pos, Address::times_1, 0)); + __ movq(Address(dest_addr, pos), tail); __ psrldq(xmm0, 8); __ addl(pos, 8); __ bind(EXTRACT_TAIL_4BYTES); __ testptr(len_reg, 4); __ jcc(Assembler::zero, EXTRACT_TAIL_2BYTES); - __ pextrd(Address(dest_addr, pos), xmm0, 0); + __ pextrd(tail, xmm0, 0); + __ xorl(tail, Address(src_addr, pos, Address::times_1, 0)); + __ movl(Address(dest_addr, pos), tail); __ psrldq(xmm0, 4); __ addq(pos, 4); __ bind(EXTRACT_TAIL_2BYTES); __ testptr(len_reg, 2); __ jcc(Assembler::zero, EXTRACT_TAIL_1BYTE); - __ pextrw(Address(dest_addr, pos), xmm0, 0); + __ pextrw(tail, xmm0, 0); + __ xorw(tail, Address(src_addr, pos, Address::times_1, 0)); + __ movw(Address(dest_addr, pos), tail); __ psrldq(xmm0, 2); __ addl(pos, 2); __ bind(EXTRACT_TAIL_1BYTE); __ testptr(len_reg, 1); __ jcc(Assembler::zero, END); - __ pextrb(Address(dest_addr, pos), xmm0, 0); + __ pextrb(tail, xmm0, 0); + __ xorb(tail, Address(src_addr, pos, Address::times_1, 0)); + __ movb(Address(dest_addr, pos), tail); __ addl(pos, 1); __ bind(END);