Skip to content

Commit

Permalink
Force outlining for array EA API
Browse files Browse the repository at this point in the history
Macros for Array Effective Address calculations are inherently inlined
and seem to create much pressure either on register or code cash in
Bytecode interpreter.

They are rewritten in C and forced to be outlined specifically for GNU
compilers on X and Z, where we saw regression when Offheap was
introduced (what made the macros more complex, creating even more
pressure).

For other platforms where we did not see regression, we continue to
inline (ATM unknown if outlining would have negative or possitive
effect). Hence we still keep it in a header (*.h) file.

Signed-off-by: Aleksandar Micic <[email protected]>
  • Loading branch information
Aleksandar Micic authored and Aleksandar Micic committed Feb 7, 2025
1 parent f714d73 commit a872cbd
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 8 deletions.
13 changes: 5 additions & 8 deletions runtime/oti/j9accessbarrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,11 @@ typedef struct J9IndexableObject* mm_j9array_t;
* else
* discontiguous
*/
#define J9JAVAARRAY_EA(vmThread, array, index, elemType) \
((J9IndexableObjectLayout_NoDataAddr_NoArraylet == (vmThread)->indexableObjectLayout) \
? J9JAVAARRAYCONTIGUOUS_BASE_EA(vmThread, array, index, elemType) \
: ((J9IndexableObjectLayout_DataAddr_NoArraylet == (vmThread)->indexableObjectLayout) \
? J9JAVAARRAYCONTIGUOUS_WITH_DATAADDRESS_VIRTUALLARGEOBJECTHEAPENABLED_EA(vmThread, array, index, elemType) \
: (J9ISCONTIGUOUSARRAY(vmThread, array) \
? J9JAVAARRAYCONTIGUOUS_EA(vmThread, array, index, elemType) \
: J9JAVAARRAYDISCONTIGUOUS_EA(vmThread, array, index, elemType))))



/* Effective Address calculation for callers using vmThread are passed to C implementation, which may force outlining for some platforms. */
#define J9JAVAARRAY_EA(vmThread, array, index, elemType) j9javaArray_##elemType##_EA(vmThread, (J9IndexableObject *)(array), index)

#define J9JAVAARRAY_EA_VM(javaVM, array, index, elemType) \
((J9IndexableObjectLayout_NoDataAddr_NoArraylet == (javaVM)->indexableObjectLayout) \
Expand Down
69 changes: 69 additions & 0 deletions runtime/oti/j9accessbarrierhelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,75 @@
#ifndef J9ACCESSBARRIERHELPERS_H
#define J9ACCESSBARRIERHELPERS_H

#if defined (J9VM_ENV_DATA64)
#if (defined(__GNUC__) && (defined(J9HAMMER) || defined(S390)))
/* Forcing non-inlining on GNU for X and Z, where inlining seems to create much register or code cache pressure within Bytecode Interpreter */
__attribute__ ((noinline))
#else /* (defined(__GNUC__) && (defined(J9HAMMER) || defined(S390))) */
VMINLINE
#endif /* (defined(__GNUC__) && (defined(J9HAMMER) || defined(S390))) */
static UDATA j9javaArray_BA(J9VMThread *vmThread, J9IndexableObject *array, UDATA *index, U_8 elementSize)
{
UDATA baseAddress = (UDATA)array;

if (J9VMTHREAD_COMPRESS_OBJECT_REFERENCES(vmThread)) {
baseAddress += sizeof(J9IndexableObjectContiguousCompressed);
} else {
baseAddress += sizeof(J9IndexableObjectContiguousFull);
}

if (J9IndexableObjectLayout_NoDataAddr_NoArraylet == vmThread->indexableObjectLayout) {
/* Standard GCs: nothing extra to do - just explicitly listed for clarity */
} else if (J9IndexableObjectLayout_DataAddr_NoArraylet == vmThread->indexableObjectLayout) {
/* Balanced Offheap; dereference dataAddr that is just after the (base) header */
baseAddress = *(UDATA *)baseAddress;
} else {
/* GCs that may have arraylet (Balanced arraylet or Metronome) - will recalculate baseAddress from scratch */
if (J9ISCONTIGUOUSARRAY(vmThread, array)) {
baseAddress = (UDATA)array + vmThread->contiguousIndexableHeaderSize;
} else {
fj9object_t *arrayoid = (fj9object_t *)((UDATA)array + vmThread->discontiguousIndexableHeaderSize);
/* While arrayletLeafSize is UDATA, the result of this division will fit into U_32 (simply because Java can't have more array elements) */
U_32 elementsPerLeaf = (U_32)(J9VMTHREAD_JAVAVM(vmThread)->arrayletLeafSize / elementSize);
U_32 leafIndex = ((U_32)*index) / elementsPerLeaf;
*index = ((U_32)*index) % elementsPerLeaf;

if (J9VMTHREAD_COMPRESS_OBJECT_REFERENCES(vmThread)) {
U_32 leafToken = *((U_32 *)arrayoid + leafIndex);
baseAddress = (UDATA)J9_CONVERT_POINTER_FROM_TOKEN__(vmThread, leafToken);
} else {
UDATA leafToken = *((UDATA *)arrayoid + leafIndex);
baseAddress = leafToken;
}
}
}


return baseAddress;
}

#define J9JAVAARRAY_C_EA(elemType) \
VMINLINE static elemType *j9javaArray_##elemType##_EA(J9VMThread *vmThread, J9IndexableObject *array, UDATA index) \
{ \
UDATA baseAddress = j9javaArray_BA(vmThread, array, &index, (U_8)sizeof(elemType)); \
/* Intentionally inlining this to treat sizeof value as an immediate value */ \
return (elemType *)(baseAddress + index * sizeof(elemType)); \
} \

/* generate C bodies */

J9JAVAARRAY_C_EA(I_8)
J9JAVAARRAY_C_EA(U_8)
J9JAVAARRAY_C_EA(I_16)
J9JAVAARRAY_C_EA(U_16)
J9JAVAARRAY_C_EA(I_32)
J9JAVAARRAY_C_EA(U_32)
J9JAVAARRAY_C_EA(I_64)
J9JAVAARRAY_C_EA(U_64)
J9JAVAARRAY_C_EA(IDATA)
J9JAVAARRAY_C_EA(UDATA)
#endif /* defined (J9VM_ENV_DATA64) */

/**
* These helpers could be written as macros (where the body of methods would be wrapped around oval parenthesis, which would mean that the last expression in the block
* is return value of the block). However, it is not fully supported by ANSI, but only select C compilers, like GNU C: https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html).
Expand Down

0 comments on commit a872cbd

Please sign in to comment.