@@ -6578,6 +6578,267 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c
6578
6578
return resultReg;
6579
6579
}
6580
6580
6581
+ /* *
6582
+ * \brief
6583
+ * Generate inlined instructions equivalent to java/lang/StringLatin1.indexOf([BI[BII)I
6584
+ *
6585
+ * \param node
6586
+ * The tree node
6587
+ *
6588
+ * \param cg
6589
+ * The Code Generator
6590
+ *
6591
+ * Note that this version does not support discontiguous arrays
6592
+ */
6593
+ static TR::Register *inlineIntrinsicStringIndexOfString (TR::Node *node, TR::CodeGenerator *cg)
6594
+ {
6595
+ static bool verboseInlineStrIdxOfStr = (feGetEnv (" TR_verboseInlineStrIdxOfStr" ) != NULL );
6596
+ if (verboseInlineStrIdxOfStr)
6597
+ {
6598
+ fprintf (stderr, " *Latin1.indexOfString(): %s @%s\n " , cg->comp ()->signature (), cg->comp ()->getHotnessName ());
6599
+ }
6600
+
6601
+ TR_ASSERT_FATAL (!TR::Compiler->om .canGenerateArraylets (), " Discontiguous array is not supported" );
6602
+
6603
+ // This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
6604
+ // receiver. Hence, the need for static call check.
6605
+ const bool isStaticCall = node->getSymbolReference ()->getSymbol ()->castToMethodSymbol ()->isStatic ();
6606
+ const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1 ;
6607
+ TR::Register *s1Reg = cg->evaluate (node->getChild (firstCallArgIdx));
6608
+ TR::Node *s1lenNode = node->getChild (firstCallArgIdx+1 );
6609
+ TR::Register *s1lenReg = cg->evaluate (s1lenNode);
6610
+ TR::Register *s2Reg = cg->evaluate (node->getChild (firstCallArgIdx+2 ));
6611
+ TR::Register *s2lenReg = cg->evaluate (node->getChild (firstCallArgIdx+3 ));
6612
+ TR::Node *offsetNode = node->getChild (firstCallArgIdx+4 );
6613
+ TR::Register *offsetReg = cg->evaluate (offsetNode);
6614
+
6615
+ TR::Register *maxReg;
6616
+ if (s1lenNode->getReferenceCount () == 1 )
6617
+ {
6618
+ maxReg = s1lenReg;
6619
+ }
6620
+ else
6621
+ {
6622
+ maxReg = cg->allocateRegister (TR_GPR);
6623
+ generateMovInstruction (cg, node, maxReg, s1lenReg);
6624
+ }
6625
+
6626
+ TR::Register *resultReg;
6627
+ if (offsetNode->getReferenceCount () == 1 )
6628
+ {
6629
+ resultReg = offsetReg;
6630
+ }
6631
+ else
6632
+ {
6633
+ resultReg = cg->allocateRegister (TR_GPR);
6634
+ generateMovInstruction (cg, node, resultReg, offsetReg);
6635
+ }
6636
+
6637
+ TR::Register *s1addrReg = cg->allocateRegister (TR_GPR);
6638
+ TR::Register *s1idxReg = cg->allocateRegister (TR_GPR);
6639
+ TR::Register *s2addrReg = cg->allocateRegister (TR_GPR);
6640
+ TR::Register *s2idxReg = cg->allocateRegister (TR_GPR);
6641
+ TR::Register *tmp1Reg = cg->allocateRegister (TR_GPR);
6642
+ TR::Register *tmp2Reg = cg->allocateRegister (TR_GPR);
6643
+ TR::Register *s2firstCharReg = cg->allocateRegister (TR_VRF);
6644
+ TR::Register *vtmp1Reg = cg->allocateRegister (TR_VRF);
6645
+ TR::Register *vtmp2Reg = cg->allocateRegister (TR_VRF);
6646
+
6647
+ TR::RegisterDependencyConditions *dependencies = new (cg->trHeapMemory ()) TR::RegisterDependencyConditions (14 , 14 , cg->trMemory ());
6648
+ dependencies->addPreCondition (s1Reg, TR::RealRegister::NoReg);
6649
+ dependencies->addPreCondition (s2Reg, TR::RealRegister::NoReg);
6650
+ dependencies->addPreCondition (s2lenReg, TR::RealRegister::NoReg);
6651
+ dependencies->addPreCondition (maxReg, TR::RealRegister::NoReg);
6652
+ dependencies->addPreCondition (resultReg, TR::RealRegister::NoReg);
6653
+ dependencies->addPreCondition (s1addrReg, TR::RealRegister::NoReg);
6654
+ dependencies->addPreCondition (s1idxReg, TR::RealRegister::NoReg);
6655
+ dependencies->addPreCondition (s2addrReg, TR::RealRegister::NoReg);
6656
+ dependencies->addPreCondition (s2idxReg, TR::RealRegister::NoReg);
6657
+ dependencies->addPreCondition (tmp1Reg, TR::RealRegister::NoReg);
6658
+ dependencies->addPreCondition (tmp2Reg, TR::RealRegister::NoReg);
6659
+ dependencies->addPreCondition (s2firstCharReg, TR::RealRegister::NoReg);
6660
+ dependencies->addPreCondition (vtmp1Reg, TR::RealRegister::NoReg);
6661
+ dependencies->addPreCondition (vtmp2Reg, TR::RealRegister::NoReg);
6662
+
6663
+ dependencies->addPostCondition (s1Reg, TR::RealRegister::NoReg);
6664
+ dependencies->addPostCondition (s2Reg, TR::RealRegister::NoReg);
6665
+ dependencies->addPostCondition (s2lenReg, TR::RealRegister::NoReg);
6666
+ dependencies->addPostCondition (maxReg, TR::RealRegister::NoReg);
6667
+ dependencies->addPostCondition (resultReg, TR::RealRegister::NoReg);
6668
+ dependencies->addPostCondition (s1addrReg, TR::RealRegister::NoReg);
6669
+ dependencies->addPostCondition (s1idxReg, TR::RealRegister::NoReg);
6670
+ dependencies->addPostCondition (s2addrReg, TR::RealRegister::NoReg);
6671
+ dependencies->addPostCondition (s2idxReg, TR::RealRegister::NoReg);
6672
+ dependencies->addPostCondition (tmp1Reg, TR::RealRegister::NoReg);
6673
+ dependencies->addPostCondition (tmp2Reg, TR::RealRegister::NoReg);
6674
+ dependencies->addPostCondition (s2firstCharReg, TR::RealRegister::NoReg);
6675
+ dependencies->addPostCondition (vtmp1Reg, TR::RealRegister::NoReg);
6676
+ dependencies->addPostCondition (vtmp2Reg, TR::RealRegister::NoReg);
6677
+
6678
+ TR::LabelSymbol *startLabel = generateLabelSymbol (cg);
6679
+ TR::LabelSymbol *outerLoopLabel = generateLabelSymbol (cg);
6680
+ TR::LabelSymbol *firstCharLoopLabel = generateLabelSymbol (cg);
6681
+ TR::LabelSymbol *firstCharMatchedLabel = generateLabelSymbol (cg);
6682
+ TR::LabelSymbol *arrayCmpVectorLoopLabel = generateLabelSymbol (cg);
6683
+ TR::LabelSymbol *arrayCmpByteLoopLabel = generateLabelSymbol (cg);
6684
+ TR::LabelSymbol *unmatchedLabel = generateLabelSymbol (cg);
6685
+ TR::LabelSymbol *notFoundLabel = generateLabelSymbol (cg);
6686
+ TR::LabelSymbol *doneLabel = generateLabelSymbol (cg);
6687
+
6688
+ startLabel->setStartInternalControlFlow ();
6689
+ doneLabel->setEndInternalControlFlow ();
6690
+
6691
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, startLabel);
6692
+
6693
+ const int32_t vecWidth = 16 ;
6694
+
6695
+ // Addresses of array elements
6696
+ #ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
6697
+ if (TR::Compiler->om .isOffHeapAllocationEnabled ())
6698
+ {
6699
+ uint32_t dataAddrOffset = static_cast <int32_t >(cg->comp ()->fej9 ()->getOffsetOfContiguousDataAddrField ());
6700
+ generateTrg1MemInstruction (cg, TR::InstOpCode::ldrimmx, node, s1addrReg, TR::MemoryReference::createWithDisplacement (cg, s1Reg, dataAddrOffset));
6701
+ generateTrg1MemInstruction (cg, TR::InstOpCode::ldrimmx, node, s2addrReg, TR::MemoryReference::createWithDisplacement (cg, s2Reg, dataAddrOffset));
6702
+ }
6703
+ else
6704
+ #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
6705
+ {
6706
+ uint32_t hdrSize = static_cast <uint32_t >(TR::Compiler->om .contiguousArrayHeaderSizeInBytes ());
6707
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmx, node, s1addrReg, s1Reg, hdrSize);
6708
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmx, node, s2addrReg, s2Reg, hdrSize);
6709
+ }
6710
+
6711
+ // First character of s2
6712
+ generateTrg1MemInstruction (cg, TR::InstOpCode::ldrbimm, node, tmp1Reg, TR::MemoryReference::createWithDisplacement (cg, s2addrReg, 0 ));
6713
+ generateTrg1Src1Instruction (cg, TR::InstOpCode::vdup16b, node, s2firstCharReg, tmp1Reg);
6714
+
6715
+ // Calculate max
6716
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::subw, node, maxReg, s1lenReg, s2lenReg);
6717
+
6718
+ // Outer loop
6719
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, outerLoopLabel);
6720
+ generateCompareInstruction (cg, node, resultReg, maxReg, /* is64bit */ false );
6721
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, notFoundLabel, TR::CC_GT);
6722
+
6723
+ // Search for the first character
6724
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::addx, node, tmp1Reg, s1addrReg, resultReg);
6725
+ generateLogicalImmInstruction (cg, TR::InstOpCode::andimmx, node, tmp2Reg, tmp1Reg, true , 3 ); // N = true, immr:imms = 3 for immediate value 0xf
6726
+ generateCompareBranchInstruction (cg, TR::InstOpCode::cbzw, node, tmp2Reg, firstCharLoopLabel);
6727
+
6728
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::subx, node, tmp1Reg, tmp1Reg, tmp2Reg); // tmp1Reg is 16-byte aligned
6729
+ generateTrg1MemInstruction (cg, TR::InstOpCode::vldrimmq, node, vtmp1Reg, TR::MemoryReference::createWithDisplacement (cg, tmp1Reg, 0 ));
6730
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::vcmeq16b, node, vtmp1Reg, vtmp1Reg, s2firstCharReg);
6731
+ generateVectorShiftImmediateInstruction (cg, TR::InstOpCode::vshrn_8b, node, vtmp1Reg, vtmp1Reg, 4 ); // 8 bits x 16 -> 4 bits x 16
6732
+ generateMovVectorElementToGPRInstruction (cg, TR::InstOpCode::umovxd, node, tmp1Reg, vtmp1Reg, 0 );
6733
+ generateLogicalShiftLeftImmInstruction (cg, node, s1idxReg, tmp2Reg, 2 , /* is64bit */ false ); // s1idxReg is used for other purpose here
6734
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::lsrvx, node, tmp1Reg, tmp1Reg, s1idxReg);
6735
+ generateCompareBranchInstruction (cg, TR::InstOpCode::cbnzx, node, tmp1Reg, firstCharMatchedLabel);
6736
+
6737
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, resultReg, resultReg, vecWidth);
6738
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::subw, node, resultReg, resultReg, tmp2Reg);
6739
+
6740
+ generateCompareInstruction (cg, node, resultReg, maxReg, /* is64bit */ false );
6741
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, notFoundLabel, TR::CC_GT);
6742
+
6743
+ // (s1addrReg + resultReg) is 16-byte aligned here
6744
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, firstCharLoopLabel);
6745
+ generateTrg1MemInstruction (cg, TR::InstOpCode::vldroffq, node, vtmp1Reg, TR::MemoryReference::createWithIndexReg (cg, s1addrReg, resultReg));
6746
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::vcmeq16b, node, vtmp1Reg, vtmp1Reg, s2firstCharReg);
6747
+ generateVectorShiftImmediateInstruction (cg, TR::InstOpCode::vshrn_8b, node, vtmp1Reg, vtmp1Reg, 4 ); // 8 bits x 16 -> 4 bits x 16
6748
+ generateMovVectorElementToGPRInstruction (cg, TR::InstOpCode::umovxd, node, tmp1Reg, vtmp1Reg, 0 );
6749
+ generateCompareBranchInstruction (cg, TR::InstOpCode::cbnzx, node, tmp1Reg, firstCharMatchedLabel);
6750
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, resultReg, resultReg, vecWidth);
6751
+ generateCompareInstruction (cg, node, resultReg, maxReg, /* is64bit */ false );
6752
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, firstCharLoopLabel, TR::CC_LE);
6753
+ generateLabelInstruction (cg, TR::InstOpCode::b, node, notFoundLabel);
6754
+
6755
+ // First character matched in vector
6756
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, firstCharMatchedLabel);
6757
+ generateTrg1Src1Instruction (cg, TR::InstOpCode::rbitx, node, tmp1Reg, tmp1Reg);
6758
+ generateTrg1Src1Instruction (cg, TR::InstOpCode::clzx, node, tmp1Reg, tmp1Reg);
6759
+ generateLogicalShiftRightImmInstruction (cg, node, tmp1Reg, tmp1Reg, 2 , /* is64bit */ true ); // div by 4
6760
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::addx, node, resultReg, resultReg, tmp1Reg);
6761
+
6762
+ generateCompareInstruction (cg, node, resultReg, maxReg, /* is64bit */ false );
6763
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, notFoundLabel, TR::CC_GT);
6764
+
6765
+ // Compare the rest of s2
6766
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, s1idxReg, resultReg, 1 ); // s1idx = offset + 1
6767
+ loadConstant32 (cg, node, 1 , s2idxReg); // s2idx = 1
6768
+
6769
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::subimmw, node, tmp2Reg, s2lenReg, 1 );
6770
+ generateLogicalShiftRightImmInstruction (cg, node, tmp2Reg, tmp2Reg, 4 , /* is64bit */ false ); // div by 16
6771
+ generateCompareBranchInstruction (cg, TR::InstOpCode::cbzw, node, tmp2Reg, arrayCmpByteLoopLabel);
6772
+
6773
+ // Vector comparison
6774
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, arrayCmpVectorLoopLabel);
6775
+ generateTrg1MemInstruction (cg, TR::InstOpCode::vldroffq, node, vtmp1Reg, TR::MemoryReference::createWithIndexReg (cg, s1addrReg, s1idxReg));
6776
+ generateTrg1MemInstruction (cg, TR::InstOpCode::vldroffq, node, vtmp2Reg, TR::MemoryReference::createWithIndexReg (cg, s2addrReg, s2idxReg));
6777
+ generateTrg1Src2Instruction (cg, TR::InstOpCode::vcmeq16b, node, vtmp1Reg, vtmp1Reg, vtmp2Reg);
6778
+ generateVectorShiftImmediateInstruction (cg, TR::InstOpCode::vshrn_8b, node, vtmp1Reg, vtmp1Reg, 4 ); // 8 bits x 16 -> 4 bits x 16
6779
+ generateMovVectorElementToGPRInstruction (cg, TR::InstOpCode::umovxd, node, tmp1Reg, vtmp1Reg, 0 );
6780
+ generateCompareImmInstruction (cg, node, tmp1Reg, -1 , /* is64bit */ true );
6781
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, unmatchedLabel, TR::CC_NE);
6782
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, s1idxReg, s1idxReg, vecWidth);
6783
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, s2idxReg, s2idxReg, vecWidth);
6784
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::subsimmw, node, tmp2Reg, tmp2Reg, 1 );
6785
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, arrayCmpVectorLoopLabel, TR::CC_NE);
6786
+
6787
+ // Byte comparison
6788
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, arrayCmpByteLoopLabel);
6789
+ generateCompareInstruction (cg, node, s2lenReg, s2idxReg);
6790
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_LE); // resultReg has the result
6791
+
6792
+ generateTrg1MemInstruction (cg, TR::InstOpCode::ldrbimm, node, tmp1Reg, TR::MemoryReference::createWithIndexReg (cg, s1addrReg, s1idxReg));
6793
+ generateTrg1MemInstruction (cg, TR::InstOpCode::ldrbimm, node, tmp2Reg, TR::MemoryReference::createWithIndexReg (cg, s2addrReg, s2idxReg));
6794
+ generateCompareInstruction (cg, node, tmp1Reg, tmp2Reg, /* is64bit */ false );
6795
+ generateConditionalBranchInstruction (cg, TR::InstOpCode::b_cond, node, unmatchedLabel, TR::CC_NE);
6796
+
6797
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, s1idxReg, s1idxReg, 1 );
6798
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, s2idxReg, s2idxReg, 1 );
6799
+ generateLabelInstruction (cg, TR::InstOpCode::b, node, arrayCmpByteLoopLabel);
6800
+
6801
+ // s2 unmatched
6802
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, unmatchedLabel);
6803
+ generateTrg1Src1ImmInstruction (cg, TR::InstOpCode::addimmw, node, resultReg, resultReg, 1 );
6804
+ generateLabelInstruction (cg, TR::InstOpCode::b, node, outerLoopLabel);
6805
+
6806
+ // Not found
6807
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, notFoundLabel);
6808
+ loadConstant32 (cg, node, -1 , resultReg);
6809
+ // fall through to doneLabel
6810
+
6811
+ generateLabelInstruction (cg, TR::InstOpCode::label, node, doneLabel, dependencies);
6812
+
6813
+ cg->stopUsingRegister (s1addrReg);
6814
+ cg->stopUsingRegister (s1idxReg);
6815
+ cg->stopUsingRegister (s2addrReg);
6816
+ cg->stopUsingRegister (s2idxReg);
6817
+ cg->stopUsingRegister (tmp1Reg);
6818
+ cg->stopUsingRegister (tmp2Reg);
6819
+ cg->stopUsingRegister (s2firstCharReg);
6820
+ cg->stopUsingRegister (vtmp1Reg);
6821
+ cg->stopUsingRegister (vtmp2Reg);
6822
+
6823
+ if (maxReg != s1lenReg)
6824
+ {
6825
+ cg->stopUsingRegister (maxReg);
6826
+ }
6827
+
6828
+ node->setRegister (resultReg);
6829
+
6830
+ if (!isStaticCall)
6831
+ {
6832
+ cg->recursivelyDecReferenceCount (node->getChild (0 ));
6833
+ }
6834
+ for (int32_t i = firstCallArgIdx; i < node->getNumChildren (); i++)
6835
+ {
6836
+ cg->decReferenceCount (node->getChild (i));
6837
+ }
6838
+
6839
+ return resultReg;
6840
+ }
6841
+
6581
6842
/* *
6582
6843
* @brief Generates inlined instructions equivalent to java/lang/StringLatin1.inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len)
6583
6844
*
@@ -6877,6 +7138,15 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
6877
7138
}
6878
7139
break ;
6879
7140
7141
+ case TR::java_lang_StringLatin1_indexOf:
7142
+ case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringLatin1:
7143
+ if (cg->getSupportsInlineStringIndexOfString ())
7144
+ {
7145
+ resultReg = inlineIntrinsicStringIndexOfString (node, cg);
7146
+ return true ;
7147
+ }
7148
+ break ;
7149
+
6880
7150
case TR::java_lang_String_hashCodeImplDecompressed:
6881
7151
if (cg->getSupportsInlineStringHashCode ())
6882
7152
{
0 commit comments