Skip to content

Commit

Permalink
z: NULL initialize dataAddr field for 0 size arrays
Browse files Browse the repository at this point in the history
Update array inline allocation sequence to initialize dataAddr field
only for non-zero size arrays. Field should be left blank for zero
size arrays.

Signed-off-by: Shubham Verma <[email protected]>
  • Loading branch information
VermaSh committed Jan 22, 2025
1 parent 73af0c7 commit 2d64d57
Showing 1 changed file with 123 additions and 131 deletions.
254 changes: 123 additions & 131 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4869,11 +4869,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
TR::LabelSymbol *cFlowRegionDone = generateLabelSymbol(cg);
TR::LabelSymbol *oolFailLabel = generateLabelSymbol(cg);

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
bool isOffHeapAllocationEnabled = TR::Compiler->om.isOffHeapAllocationEnabled();
TR::LabelSymbol *populateFirstDimDataAddrSlot = isOffHeapAllocationEnabled ? generateLabelSymbol(cg) : NULL;
#endif /* defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) */

// oolJumpLabel is a common point that all branches will jump to. From this label, we branch to OOL code.
// We do this instead of jumping directly to OOL code from mainline because the RA can only handle the case where there's
// a single jump point to OOL code.
Expand All @@ -4888,13 +4883,18 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
TR::Register *dimReg = cg->evaluate(secondChild);
TR::Register *classReg = cg->evaluate(thirdChild);

// In the mainline, first load the first and second dimensions' lengths into registers.
/* In the mainline, first load the first and second dimensions' lengths into registers.
*
* LGF is used instead of L so that array size register can be used to NULL dataAddr
* field for 0-size arrays. dataAddr field is 64 bits whereas size field is 32 bits so
* we need LGF to clear upper 32 bits of the register.
*/
TR::Register *firstDimLenReg = cg->allocateRegister();
cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg));
iComment("Load 1st dim length.");

TR::Register *secondDimLenReg = cg->allocateRegister();
cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg));
cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg));
iComment("Load 2nd dim length.");

// Check to see if second dimension is indeed 0. If yes, then proceed to handle the case here. Otherwise jump to OOL code.
Expand Down Expand Up @@ -4951,30 +4951,18 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
}

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
TR_ASSERT_FATAL_WITH_NODE(node,
(TR::Compiler->om.compressObjectReferences()
&& (fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8)
|| (!TR::Compiler->om.compressObjectReferences()
&& fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField()),
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array if using compressed refs, "
"or same if using full refs. But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());

// Load dataAddr slot offset difference since 0 size arrays are treated as discontiguous.
generateRIInstruction(cg,
TR::InstOpCode::LGHI,
node,
temp1Reg,
static_cast<int32_t>(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()));
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot);
}
else
bool isOffHeapAllocationEnabled = TR::Compiler->om.isOffHeapAllocationEnabled();
if (isOffHeapAllocationEnabled)
{
// Since 1st dimension is 0 length, we can use firstDimLenReg to clear
// dataAddr field. secondDimLenReg is expected to be NULL at this point.
cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg));
iComment("Clear 1st dim dataAddr field.");
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
{
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);
}

cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);
iComment("Init class field and jump.");

// We end up in this region of the ICF if the first dimension is non-zero and the second dimension is zero.
Expand Down Expand Up @@ -5026,6 +5014,27 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
iComment("Init 1st dim class field.");
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg));
iComment("Init 1st dim size field.");

TR::Register *temp3Reg = NULL;
#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
temp3Reg = cg->allocateRegister();
// Populate dataAddr slot of 1st dimension array. We don't need to worry
// about zero length array since it has already been taken care of.
generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
temp3Reg,
generateS390MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
cursor = generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
temp3Reg,
generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousDataAddrField(), cg));
iComment("populateFirstDimDataAddrSlot.");
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
// temp2 point to end of 1st dim array i.e. start of 2nd dim
generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp2Reg, targetReg);
generateRRInstruction(cg, TR::InstOpCode::AGR, node, temp2Reg, temp1Reg);
Expand Down Expand Up @@ -5058,24 +5067,20 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
iComment("Init 2st dim size field.");
}

TR::Register *temp3Reg = cg->allocateRegister();

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
// Populate dataAddr slot for 2nd dimension zero size array.
generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
temp3Reg,
generateS390MemoryReference(temp2Reg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg));
generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
temp3Reg,
generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg));
}
if (isOffHeapAllocationEnabled)
{
// Since 2nd dimension array is 0 length, we can use secondDimLenReg
// to clear dataAddr field. secondDimLenReg is expected to be NULL
// at this point.
cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg));
iComment("Clear 2nd dim dataAddr field.");
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
}

if (NULL == temp3Reg)
temp3Reg = cg->allocateRegister();

// Store 2nd dim element into 1st dim array slot, compress temp2 if needed
if (comp->target().is64Bit() && comp->useCompressedPointers())
Expand All @@ -5100,18 +5105,7 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, firstDimLenReg, 1);
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CL, node, firstDimLenReg, 0, TR::InstOpCode::COND_BNE, loopLabel, false);

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
// No offset is needed since 1st dimension array is contiguous.
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, temp1Reg, temp1Reg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot);
}
else
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
{
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);
}
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone);

TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0,10,cg);
dependencies->addPostCondition(dimReg, TR::RealRegister::AssignAny);
Expand All @@ -5128,28 +5122,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node,
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolJumpLabel);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, oolFailLabel);

#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
if (isOffHeapAllocationEnabled)
{
/* Populate dataAddr slot of 1st dimension array. Arrays of non-zero size
* use contiguous header layout while zero size arrays use discontiguous header layout.
*/
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, populateFirstDimDataAddrSlot);
iComment("populateFirstDimDataAddrSlot.");

generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
temp3Reg,
generateS390MemoryReference(targetReg, temp1Reg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
temp3Reg,
generateS390MemoryReference(targetReg, temp1Reg, fej9->getOffsetOfContiguousDataAddrField(), cg));
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionDone, dependencies);

TR::Register *targetRegisterFinal = cg->allocateCollectedReferenceRegister();
Expand Down Expand Up @@ -11126,73 +11098,93 @@ J9::Z::TreeEvaluator::VMnewEvaluator(TR::Node * node, TR::CodeGenerator * cg)
* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use
* discontiguous header layout.
*/
TR::Register *offsetReg = NULL;
TR::MemoryReference *dataAddrMR = NULL;
TR::MemoryReference *dataAddrSlotMR = NULL;

if (isVariableLen && TR::Compiler->om.compressObjectReferences())
if (!TR::Compiler->om.compressObjectReferences())
{
/* We need to check enumReg (array size) at runtime to determine correct offset of dataAddr field.
* Here we deal only with compressed refs because dataAddr offset for discontiguous
* and contiguous arrays is the same in full refs.
*/
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);

TR_ASSERT_FATAL_WITH_NODE(node,
(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());

offsetReg = cg->allocateRegister();
// Invert enumReg sign. 0 and negative numbers remain unchanged.
iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGFR, node, offsetReg, enumReg, iCursor);
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, dataSizeReg, offsetReg, 63, iCursor);
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, offsetReg, dataSizeReg, 3, iCursor);
// Inverting the sign bit will leave us with either -8 (if enumCopyReg > 0) or 0 (if enumCopyReg == 0).
iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGR, node, offsetReg, offsetReg, iCursor);

dataAddrMR = generateS390MemoryReference(resReg, offsetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
dataAddrSlotMR = generateS390MemoryReference(resReg, offsetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
}
else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)

if (isVariableLen)
{
/* We need to check enumReg (array size) at runtime to adjust what we write to dataAddr field.
* We write NULL if array is 0 size and first data element address otherwise.
*
* We don't adjust dataAddr field offset because if we are incorrect:
* - compressed refs: we overwrite size and must be zero field in array header,
* which is going to be 0 anyways
* - full refs: we write 0s to dataAddr field
*/
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);
traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);

dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
TR::Register *offsetReg = cg->allocateRegister();
iCursor = generateRRInstruction(cg, TR::InstOpCode::XGR, node, offsetReg, offsetReg, iCursor);
iCursor = generateRILInstruction(cg, TR::InstOpCode::CFI, node, enumReg, 0, iCursor);

// Load address of first array element
iCursor = generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
dataSizeReg,
generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg),
iCursor);
// Use cc from CFI to load NULL into dataSizeReg for 0 size array
iCursor = generateRRFInstruction(cg, TR::InstOpCode::LOCGR, node, dataSizeReg, offsetReg, getMaskForBranchCondition(TR::InstOpCode::COND_BE), true, iCursor);

// Write element address to dataAddr field
iCursor = generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
dataSizeReg,
generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg),
iCursor);

conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny);
cg->stopUsingRegister(offsetReg);
}
else
else if (node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)
{
if (comp->getOption(TR_TraceCG))
{
traceMsg(comp,
"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n",
node);
}
traceMsg(comp, "Node (%p): Dealing with fixed length 0 size array.\n", node);

if (!TR::Compiler->om.compressObjectReferences())
if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())
{
TR_ASSERT_FATAL_WITH_NODE(node,
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Clean out dataAddr field.\n", node);

uint16_t bytesToClear = static_cast<uint16_t>(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes() - fej9->getOffsetOfDiscontiguousDataAddrField());
TR_ASSERT_FATAL_WITH_NODE(node, bytesToClear == 8, "dataAddr field is expected to be 8 bytes wide, but was %d bytes.\n", bytesToClear);

iCursor = generateSS1Instruction(cg, TR::InstOpCode::XC,
node,
bytesToClear - 1,
generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg),
generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg),
iCursor);
}

dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg);
}

iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, dataSizeReg, dataAddrMR, iCursor);
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, dataSizeReg, dataAddrSlotMR, iCursor);

if (offsetReg)
else
{
conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny);
cg->stopUsingRegister(offsetReg);
if (comp->getOption(TR_TraceCG))
traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length non-zero size array.\n", node);

// Load address of first array element
iCursor = generateRXInstruction(cg,
TR::InstOpCode::LA,
node,
dataSizeReg,
generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg),
iCursor);
// Write first data element address to dataAddr field
iCursor = generateRXInstruction(cg,
TR::InstOpCode::STG,
node,
dataSizeReg,
generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg),
iCursor);
}
}
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
Expand Down

0 comments on commit 2d64d57

Please sign in to comment.