Skip to content

Commit a785cc7

Browse files
authored
Merge pull request #21796 from VermaSh/enableStringIndexOf
Enable inlineIntrinsicIndexOf for off-heap
2 parents a3f2552 + d77e768 commit a785cc7

File tree

2 files changed

+87
-20
lines changed

2 files changed

+87
-20
lines changed

runtime/compiler/z/codegen/J9CodeGenerator.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,9 @@ J9::Z::CodeGenerator::initialize()
8787
if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDStringCaseConv))
8888
cg->setSupportsInlineStringCaseConversion();
8989

90-
if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableFastStringIndexOf) &&
91-
!TR::Compiler->om.canGenerateArraylets() && !TR::Compiler->om.isOffHeapAllocationEnabled())
90+
if (cg->getSupportsVectorRegisters()
91+
&& !comp->getOption(TR_DisableFastStringIndexOf)
92+
&& !TR::Compiler->om.canGenerateArraylets())
9293
{
9394
cg->setSupportsInlineStringIndexOf();
9495
}

runtime/compiler/z/codegen/J9TreeEvaluator.cpp

Lines changed: 84 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,7 +1142,7 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
11421142
TR::Compilation *comp = cg->comp();
11431143
const uint32_t elementSizeMask = isUTF16 ? 1 : 0;
11441144
const int8_t vectorSize = cg->machine()->getVRFSize();
1145-
const uintptr_t headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
1145+
11461146
const bool supportsVSTRS = comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_2);
11471147
TR_Debug *compDebug = comp->getDebug();
11481148
TR::Instruction* cursor;
@@ -1162,12 +1162,45 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
11621162
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager(9);
11631163

11641164
// Get call parameters where stringValue and patternValue are byte arrays
1165-
TR::Register* stringValueReg = cg->evaluate(node->getChild(firstCallArgIdx));
1165+
TR::Register* stringValueReg = NULL;
11661166
TR::Register* stringLenReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+1));
1167-
TR::Register* patternValueReg = cg->evaluate(node->getChild(firstCallArgIdx+2));
1167+
TR::Register* patternValueReg = NULL;
11681168
TR::Register* patternLenReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));
11691169
TR::Register* stringIndexReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+4));
11701170

1171+
// Offset to be added to array object pointer to get to the data elements
1172+
int32_t offsetToDataElements = static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
1173+
#ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
1174+
if (TR::Compiler->om.isOffHeapAllocationEnabled())
1175+
{
1176+
// Clobber evaluate string and pattern value nodes as we'll overwrite those with first data element address
1177+
stringValueReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx));
1178+
patternValueReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+2));
1179+
1180+
// Load first data element address for string
1181+
generateRXInstruction(cg,
1182+
TR::InstOpCode::getLoadOpCode(),
1183+
node,
1184+
stringValueReg,
1185+
generateS390MemoryReference(stringValueReg, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
1186+
1187+
// Load first data element address pattern
1188+
generateRXInstruction(cg,
1189+
TR::InstOpCode::getLoadOpCode(),
1190+
node,
1191+
patternValueReg,
1192+
generateS390MemoryReference(patternValueReg, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
1193+
1194+
// Since the first data element address is retrieved from the array header, the offset is set to 0
1195+
offsetToDataElements = 0;
1196+
}
1197+
else
1198+
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
1199+
{
1200+
stringValueReg = cg->evaluate(node->getChild(firstCallArgIdx));
1201+
patternValueReg = cg->evaluate(node->getChild(firstCallArgIdx+2));
1202+
}
1203+
11711204
// Registers
11721205
TR::Register* matchIndexReg = cg->allocateRegister();
11731206
TR::Register* maxIndexReg = srm->findOrCreateScratchRegister();
@@ -1281,14 +1314,14 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
12811314

12821315
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, (int8_t)vectorSize, labelPatternLoad16Bytes, TR::InstOpCode::COND_BNL);
12831316
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, loadLenReg, patternLenReg, -1);
1284-
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternHeadVReg, loadLenReg, generateS390MemoryReference(patternValueReg, headerSize, cg));
1317+
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternHeadVReg, loadLenReg, generateS390MemoryReference(patternValueReg, offsetToDataElements, cg));
12851318
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, loadLenReg, patternLenReg);
12861319
generateVRSbInstruction(cg, TR::InstOpCode::VLVG, node, patternLenVReg, patternLenReg, generateS390MemoryReference(7, cg), 0);
12871320
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLoadDone);
12881321

12891322
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLoad16Bytes);
12901323
iComment("load first 16 bytes of the pattern");
1291-
generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternHeadVReg, generateS390MemoryReference(patternValueReg, headerSize, cg));
1324+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternHeadVReg, generateS390MemoryReference(patternValueReg, offsetToDataElements, cg));
12921325
generateRIInstruction(cg, TR::InstOpCode::LHI, node, loadLenReg, vectorSize);
12931326
generateVRSbInstruction(cg, TR::InstOpCode::VLVG, node, patternLenVReg, loadLenReg, generateS390MemoryReference(7, cg), 0);
12941327
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLoadDone);
@@ -1307,13 +1340,13 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
13071340
// e.g. If the load length is 8 bytes, the highest index is 7. Hence, the need for -1.
13081341
cursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
13091342
iComment("needs -1 because VLL's third operand is the highest index to load");
1310-
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize, cg));
1343+
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, offsetToDataElements, cg));
13111344
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, 1);
13121345
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelLoadStringLenDone);
13131346
srm->reclaimScratchRegister(stringCharPtrReg);
13141347
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadString16Bytes);
13151348
iComment("load 16 bytes of the string");
1316-
generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, headerSize, cg));
1349+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, offsetToDataElements, cg));
13171350
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, loadLenReg, vectorSize);
13181351
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadStringLenDone);
13191352
iComment("bytes of the string have been loaded");
@@ -1392,7 +1425,7 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
13921425
/************************************** 1st char of pattern ******************************************/
13931426
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelFindPatternHead);
13941427
iComment("find first character of pattern");
1395-
generateVRXInstruction(cg, TR::InstOpCode::VLREP, node, patternFirstCharVReg, generateS390MemoryReference(patternValueReg, headerSize, cg), elementSizeMask);
1428+
generateVRXInstruction(cg, TR::InstOpCode::VLREP, node, patternFirstCharVReg, generateS390MemoryReference(patternValueReg, offsetToDataElements, cg), elementSizeMask);
13961429

13971430
// Determine string load length. loadLenReg is either vectorSize-1 (15) or the 1st_char_matching residue length.
13981431
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, loadLenReg, stringIndexReg, vectorSize);
@@ -1411,7 +1444,7 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
14111444
TR::Register* stringCharPtrReg = srm->findOrCreateScratchRegister();
14121445
TR::LabelSymbol* labelExtractFirstCharPos = generateLabelSymbol(cg);
14131446
generateRRRInstruction(cg, TR::InstOpCode::getAddThreeRegOpCode(), node, stringCharPtrReg, stringValueReg, stringIndexReg);
1414-
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize, cg));
1447+
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, offsetToDataElements, cg));
14151448
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, searchResultVReg, stringVReg, patternFirstCharVReg, 0x1, elementSizeMask);
14161449
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, labelExtractFirstCharPos);
14171450
srm->reclaimScratchRegister(stringCharPtrReg);
@@ -1452,11 +1485,11 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
14521485

14531486
// Vector loads use load index. And [load_index = load_len - 1]
14541487
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
1455-
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize, cg));
1488+
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, offsetToDataElements, cg));
14561489
srm->reclaimScratchRegister(stringCharPtrReg);
14571490
// If VSTRS is supported, the first VSTRS already handled the 1st 16 bytes at this point (full match in the 1st 16
1458-
// bytes). Hence, residue offset starts at 16.
1459-
uint32_t patternResidueDisp = headerSize + (supportsVSTRS ? vectorSize : 0);
1491+
// bytes). Hence, residue offset starts at 16. This applies only to non-offheap mode, for off-heap offsetToDataElements is 0.
1492+
uint32_t patternResidueDisp = offsetToDataElements + (supportsVSTRS ? vectorSize : 0);
14601493

14611494
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternVReg, loadLenReg, generateS390MemoryReference(patternValueReg, patternResidueDisp, cg));
14621495
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, 1);
@@ -1504,8 +1537,8 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
15041537
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelMatchPatternLoop);
15051538
iComment("start search for reset of the pattern");
15061539
// Start to match the reset of pattern
1507-
generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, headerSize, cg));
1508-
generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternVReg, generateS390MemoryReference(patternValueReg, patternIndexReg, headerSize, cg));
1540+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, offsetToDataElements, cg));
1541+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternVReg, generateS390MemoryReference(patternValueReg, patternIndexReg, offsetToDataElements, cg));
15091542

15101543
generateVRRbInstruction(cg, TR::InstOpCode::VCEQ, node, searchResultVReg, stringVReg, patternVReg, 1, elementSizeMask);
15111544
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, labelPartialPatternMatch);
@@ -1588,6 +1621,10 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
15881621
{
15891622
cg->decReferenceCount(node->getChild(i));
15901623
}
1624+
cg->stopUsingRegister(stringValueReg);
1625+
cg->stopUsingRegister(stringLenReg);
1626+
cg->stopUsingRegister(patternValueReg);
1627+
cg->stopUsingRegister(patternLenReg);
15911628
cg->stopUsingRegister(stringIndexReg);
15921629
srm->stopUsingRegisters();
15931630

@@ -1842,7 +1879,31 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
18421879
// receiver. Hence, the need for static call check.
18431880
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
18441881
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
1845-
TR::Register* array = cg->evaluate(node->getChild(firstCallArgIdx));
1882+
TR::Register* array = NULL;
1883+
1884+
// Offset to be added to array object pointer to get to the data elements
1885+
int32_t offsetToDataElements = static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
1886+
#ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
1887+
if (TR::Compiler->om.isOffHeapAllocationEnabled())
1888+
{
1889+
// Clobber evaluate array node as we'll overwrite it with first data element address
1890+
array = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx));
1891+
1892+
// Load first data element address
1893+
generateRXInstruction(cg,
1894+
TR::InstOpCode::getLoadOpCode(),
1895+
node,
1896+
array,
1897+
generateS390MemoryReference(array, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
1898+
1899+
// Since the first data element address is retrieved from the array header, the offset is set to 0
1900+
offsetToDataElements = 0;
1901+
}
1902+
else
1903+
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
1904+
{
1905+
array = cg->evaluate(node->getChild(firstCallArgIdx));
1906+
}
18461907
TR::Register* ch = cg->evaluate(node->getChild(firstCallArgIdx+1));
18471908
TR::Register* offset = cg->evaluate(node->getChild(firstCallArgIdx+2));
18481909
TR::Register* length = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));
@@ -1871,6 +1932,7 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
18711932
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol( cg);
18721933

18731934
TR::RegisterDependencyConditions* regDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 8, cg);
1935+
18741936
regDeps->addPostCondition(array, TR::RealRegister::AssignAny);
18751937
regDeps->addPostCondition(loopCounter, TR::RealRegister::AssignAny);
18761938
regDeps->addPostCondition(indexRegister, TR::RealRegister::AssignAny);
@@ -1912,8 +1974,7 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
19121974

19131975
// VLL takes an index, not a count, so subtract 1 from the count
19141976
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, loadLength, 1);
1915-
1916-
generateRXInstruction(cg, TR::InstOpCode::LA, node, offsetAddress, generateS390MemoryReference(array, indexRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
1977+
generateRXInstruction(cg, TR::InstOpCode::LA, node, offsetAddress, generateS390MemoryReference(array, indexRegister, offsetToDataElements, cg));
19171978
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, charBufferVector, loadLength, generateS390MemoryReference(offsetAddress, 0, cg));
19181979

19191980
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, resultVector, charBufferVector, valueVector, 0x1, elementSizeMask);
@@ -1941,7 +2002,7 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
19412002

19422003
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
19432004

1944-
generateVRXInstruction(cg, TR::InstOpCode::VL, node, charBufferVector, generateS390MemoryReference(array, indexRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2005+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, charBufferVector, generateS390MemoryReference(array, indexRegister, offsetToDataElements, cg));
19452006

19462007
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, resultVector, charBufferVector, valueVector, 0x1, elementSizeMask);
19472008
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, foundLabel);
@@ -1968,6 +2029,11 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
19682029
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, regDeps);
19692030
cFlowRegionEnd->setEndInternalControlFlow();
19702031

2032+
cg->stopUsingRegister(array);
2033+
cg->stopUsingRegister(ch);
2034+
cg->stopUsingRegister(offset);
2035+
cg->stopUsingRegister(length);
2036+
19712037
cg->stopUsingRegister(loopCounter);
19722038
cg->stopUsingRegister(loadLength);
19732039
cg->stopUsingRegister(offsetAddress);

0 commit comments

Comments
 (0)