@@ -1142,7 +1142,7 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1142
1142
TR::Compilation *comp = cg->comp();
1143
1143
const uint32_t elementSizeMask = isUTF16 ? 1 : 0;
1144
1144
const int8_t vectorSize = cg->machine()->getVRFSize();
1145
- const uintptr_t headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
1145
+
1146
1146
const bool supportsVSTRS = comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_2);
1147
1147
TR_Debug *compDebug = comp->getDebug();
1148
1148
TR::Instruction* cursor;
@@ -1162,12 +1162,45 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1162
1162
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager(9);
1163
1163
1164
1164
// Get call parameters where stringValue and patternValue are byte arrays
1165
- TR::Register* stringValueReg = cg->evaluate(node->getChild(firstCallArgIdx)) ;
1165
+ TR::Register* stringValueReg = NULL ;
1166
1166
TR::Register* stringLenReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+1));
1167
- TR::Register* patternValueReg = cg->evaluate(node->getChild(firstCallArgIdx+2)) ;
1167
+ TR::Register* patternValueReg = NULL ;
1168
1168
TR::Register* patternLenReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));
1169
1169
TR::Register* stringIndexReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+4));
1170
1170
1171
+ // Offset to be added to array object pointer to get to the data elements
1172
+ int32_t offsetToDataElements = static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
1173
+ #ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
1174
+ if (TR::Compiler->om.isOffHeapAllocationEnabled())
1175
+ {
1176
+ // Clobber evaluate string and pattern value nodes as we'll overwrite those with first data element address
1177
+ stringValueReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx));
1178
+ patternValueReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+2));
1179
+
1180
+ // Load first data element address for string
1181
+ generateRXInstruction(cg,
1182
+ TR::InstOpCode::getLoadOpCode(),
1183
+ node,
1184
+ stringValueReg,
1185
+ generateS390MemoryReference(stringValueReg, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
1186
+
1187
+ // Load first data element address pattern
1188
+ generateRXInstruction(cg,
1189
+ TR::InstOpCode::getLoadOpCode(),
1190
+ node,
1191
+ patternValueReg,
1192
+ generateS390MemoryReference(patternValueReg, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
1193
+
1194
+ // Since the first data element address is retrieved from the array header, the offset is set to 0
1195
+ offsetToDataElements = 0;
1196
+ }
1197
+ else
1198
+ #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
1199
+ {
1200
+ stringValueReg = cg->evaluate(node->getChild(firstCallArgIdx));
1201
+ patternValueReg = cg->evaluate(node->getChild(firstCallArgIdx+2));
1202
+ }
1203
+
1171
1204
// Registers
1172
1205
TR::Register* matchIndexReg = cg->allocateRegister();
1173
1206
TR::Register* maxIndexReg = srm->findOrCreateScratchRegister();
@@ -1281,14 +1314,14 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1281
1314
1282
1315
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, (int8_t)vectorSize, labelPatternLoad16Bytes, TR::InstOpCode::COND_BNL);
1283
1316
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, loadLenReg, patternLenReg, -1);
1284
- generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternHeadVReg, loadLenReg, generateS390MemoryReference(patternValueReg, headerSize , cg));
1317
+ generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternHeadVReg, loadLenReg, generateS390MemoryReference(patternValueReg, offsetToDataElements , cg));
1285
1318
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, loadLenReg, patternLenReg);
1286
1319
generateVRSbInstruction(cg, TR::InstOpCode::VLVG, node, patternLenVReg, patternLenReg, generateS390MemoryReference(7, cg), 0);
1287
1320
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLoadDone);
1288
1321
1289
1322
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLoad16Bytes);
1290
1323
iComment("load first 16 bytes of the pattern");
1291
- generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternHeadVReg, generateS390MemoryReference(patternValueReg, headerSize , cg));
1324
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternHeadVReg, generateS390MemoryReference(patternValueReg, offsetToDataElements , cg));
1292
1325
generateRIInstruction(cg, TR::InstOpCode::LHI, node, loadLenReg, vectorSize);
1293
1326
generateVRSbInstruction(cg, TR::InstOpCode::VLVG, node, patternLenVReg, loadLenReg, generateS390MemoryReference(7, cg), 0);
1294
1327
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLoadDone);
@@ -1307,13 +1340,13 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1307
1340
// e.g. If the load length is 8 bytes, the highest index is 7. Hence, the need for -1.
1308
1341
cursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
1309
1342
iComment("needs -1 because VLL's third operand is the highest index to load");
1310
- generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize , cg));
1343
+ generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, offsetToDataElements , cg));
1311
1344
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, 1);
1312
1345
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelLoadStringLenDone);
1313
1346
srm->reclaimScratchRegister(stringCharPtrReg);
1314
1347
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadString16Bytes);
1315
1348
iComment("load 16 bytes of the string");
1316
- generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, headerSize , cg));
1349
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, offsetToDataElements , cg));
1317
1350
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, loadLenReg, vectorSize);
1318
1351
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadStringLenDone);
1319
1352
iComment("bytes of the string have been loaded");
@@ -1392,7 +1425,7 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1392
1425
/************************************** 1st char of pattern ******************************************/
1393
1426
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelFindPatternHead);
1394
1427
iComment("find first character of pattern");
1395
- generateVRXInstruction(cg, TR::InstOpCode::VLREP, node, patternFirstCharVReg, generateS390MemoryReference(patternValueReg, headerSize , cg), elementSizeMask);
1428
+ generateVRXInstruction(cg, TR::InstOpCode::VLREP, node, patternFirstCharVReg, generateS390MemoryReference(patternValueReg, offsetToDataElements , cg), elementSizeMask);
1396
1429
1397
1430
// Determine string load length. loadLenReg is either vectorSize-1 (15) or the 1st_char_matching residue length.
1398
1431
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, loadLenReg, stringIndexReg, vectorSize);
@@ -1411,7 +1444,7 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1411
1444
TR::Register* stringCharPtrReg = srm->findOrCreateScratchRegister();
1412
1445
TR::LabelSymbol* labelExtractFirstCharPos = generateLabelSymbol(cg);
1413
1446
generateRRRInstruction(cg, TR::InstOpCode::getAddThreeRegOpCode(), node, stringCharPtrReg, stringValueReg, stringIndexReg);
1414
- generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize , cg));
1447
+ generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, offsetToDataElements , cg));
1415
1448
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, searchResultVReg, stringVReg, patternFirstCharVReg, 0x1, elementSizeMask);
1416
1449
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, labelExtractFirstCharPos);
1417
1450
srm->reclaimScratchRegister(stringCharPtrReg);
@@ -1452,11 +1485,11 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1452
1485
1453
1486
// Vector loads use load index. And [load_index = load_len - 1]
1454
1487
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
1455
- generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize , cg));
1488
+ generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, offsetToDataElements , cg));
1456
1489
srm->reclaimScratchRegister(stringCharPtrReg);
1457
1490
// If VSTRS is supported, the first VSTRS already handled the 1st 16 bytes at this point (full match in the 1st 16
1458
- // bytes). Hence, residue offset starts at 16.
1459
- uint32_t patternResidueDisp = headerSize + (supportsVSTRS ? vectorSize : 0);
1491
+ // bytes). Hence, residue offset starts at 16. This applies only to non-offheap mode, for off-heap offsetToDataElements is 0.
1492
+ uint32_t patternResidueDisp = offsetToDataElements + (supportsVSTRS ? vectorSize : 0);
1460
1493
1461
1494
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternVReg, loadLenReg, generateS390MemoryReference(patternValueReg, patternResidueDisp, cg));
1462
1495
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, 1);
@@ -1504,8 +1537,8 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1504
1537
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelMatchPatternLoop);
1505
1538
iComment("start search for reset of the pattern");
1506
1539
// Start to match the reset of pattern
1507
- generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, headerSize , cg));
1508
- generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternVReg, generateS390MemoryReference(patternValueReg, patternIndexReg, headerSize , cg));
1540
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, offsetToDataElements , cg));
1541
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternVReg, generateS390MemoryReference(patternValueReg, patternIndexReg, offsetToDataElements , cg));
1509
1542
1510
1543
generateVRRbInstruction(cg, TR::InstOpCode::VCEQ, node, searchResultVReg, stringVReg, patternVReg, 1, elementSizeMask);
1511
1544
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, labelPartialPatternMatch);
@@ -1588,6 +1621,10 @@ J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGene
1588
1621
{
1589
1622
cg->decReferenceCount(node->getChild(i));
1590
1623
}
1624
+ cg->stopUsingRegister(stringValueReg);
1625
+ cg->stopUsingRegister(stringLenReg);
1626
+ cg->stopUsingRegister(patternValueReg);
1627
+ cg->stopUsingRegister(patternLenReg);
1591
1628
cg->stopUsingRegister(stringIndexReg);
1592
1629
srm->stopUsingRegisters();
1593
1630
@@ -1842,7 +1879,31 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
1842
1879
// receiver. Hence, the need for static call check.
1843
1880
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
1844
1881
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
1845
- TR::Register* array = cg->evaluate(node->getChild(firstCallArgIdx));
1882
+ TR::Register* array = NULL;
1883
+
1884
+ // Offset to be added to array object pointer to get to the data elements
1885
+ int32_t offsetToDataElements = static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
1886
+ #ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
1887
+ if (TR::Compiler->om.isOffHeapAllocationEnabled())
1888
+ {
1889
+ // Clobber evaluate array node as we'll overwrite it with first data element address
1890
+ array = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx));
1891
+
1892
+ // Load first data element address
1893
+ generateRXInstruction(cg,
1894
+ TR::InstOpCode::getLoadOpCode(),
1895
+ node,
1896
+ array,
1897
+ generateS390MemoryReference(array, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
1898
+
1899
+ // Since the first data element address is retrieved from the array header, the offset is set to 0
1900
+ offsetToDataElements = 0;
1901
+ }
1902
+ else
1903
+ #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
1904
+ {
1905
+ array = cg->evaluate(node->getChild(firstCallArgIdx));
1906
+ }
1846
1907
TR::Register* ch = cg->evaluate(node->getChild(firstCallArgIdx+1));
1847
1908
TR::Register* offset = cg->evaluate(node->getChild(firstCallArgIdx+2));
1848
1909
TR::Register* length = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));
@@ -1871,6 +1932,7 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
1871
1932
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol( cg);
1872
1933
1873
1934
TR::RegisterDependencyConditions* regDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 8, cg);
1935
+
1874
1936
regDeps->addPostCondition(array, TR::RealRegister::AssignAny);
1875
1937
regDeps->addPostCondition(loopCounter, TR::RealRegister::AssignAny);
1876
1938
regDeps->addPostCondition(indexRegister, TR::RealRegister::AssignAny);
@@ -1912,8 +1974,7 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
1912
1974
1913
1975
// VLL takes an index, not a count, so subtract 1 from the count
1914
1976
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, loadLength, 1);
1915
-
1916
- generateRXInstruction(cg, TR::InstOpCode::LA, node, offsetAddress, generateS390MemoryReference(array, indexRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
1977
+ generateRXInstruction(cg, TR::InstOpCode::LA, node, offsetAddress, generateS390MemoryReference(array, indexRegister, offsetToDataElements, cg));
1917
1978
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, charBufferVector, loadLength, generateS390MemoryReference(offsetAddress, 0, cg));
1918
1979
1919
1980
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, resultVector, charBufferVector, valueVector, 0x1, elementSizeMask);
@@ -1941,7 +2002,7 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
1941
2002
1942
2003
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
1943
2004
1944
- generateVRXInstruction(cg, TR::InstOpCode::VL, node, charBufferVector, generateS390MemoryReference(array, indexRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg));
2005
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, charBufferVector, generateS390MemoryReference(array, indexRegister, offsetToDataElements , cg));
1945
2006
1946
2007
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, resultVector, charBufferVector, valueVector, 0x1, elementSizeMask);
1947
2008
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, foundLabel);
@@ -1968,6 +2029,11 @@ J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator
1968
2029
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, regDeps);
1969
2030
cFlowRegionEnd->setEndInternalControlFlow();
1970
2031
2032
+ cg->stopUsingRegister(array);
2033
+ cg->stopUsingRegister(ch);
2034
+ cg->stopUsingRegister(offset);
2035
+ cg->stopUsingRegister(length);
2036
+
1971
2037
cg->stopUsingRegister(loopCounter);
1972
2038
cg->stopUsingRegister(loadLength);
1973
2039
cg->stopUsingRegister(offsetAddress);
0 commit comments