Skip to content

Commit 17f0af9

Browse files
authored
Merge pull request #20692 from dchopra001/hasNeg-0.49
(0.49) Accelerate StringCoding.hasNegatives on Z
2 parents 23b2711 + 446794b commit 17f0af9

File tree

7 files changed

+148
-0
lines changed

7 files changed

+148
-0
lines changed

runtime/compiler/codegen/J9CodeGenerator.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
461461
*/
462462
void setSupportsInlineStringHashCode() { _j9Flags.set(SupportsInlineStringHashCode); }
463463

464+
/** \brief
465+
* Determines whether the code generator supports inlining of java/lang/StringCoding.hasNegatives
466+
*/
467+
bool getSupportsInlineStringCodingHasNegatives() { return _j9Flags.testAny(SupportsInlineStringCodingHasNegatives); }
468+
469+
/** \brief
470+
* The code generator supports inlining of java/lang/StringCoding.hasNegatives
471+
*/
472+
void setSupportsInlineStringCodingHasNegatives() { _j9Flags.set(SupportsInlineStringCodingHasNegatives); }
473+
464474
/** \brief
465475
* Determines whether the code generator supports inlining of java/lang/StringLatin1.inflate
466476
*/
@@ -677,6 +687,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
677687
SavesNonVolatileGPRsForGC = 0x00000800,
678688
SupportsInlineVectorizedMismatch = 0x00001000,
679689
SupportsInlineVectorizedHashCode = 0x00002000,
690+
SupportsInlineStringCodingHasNegatives = 0x00004000,
680691
};
681692

682693
flags32_t _j9Flags;

runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,7 @@
12261226
java_lang_StringCoding_encode,
12271227
java_lang_StringCoding_StringDecoder_decode,
12281228
java_lang_StringCoding_StringEncoder_encode,
1229+
java_lang_StringCoding_hasNegatives,
12291230
java_lang_StringCoding_implEncodeISOArray,
12301231
java_lang_StringCoding_implEncodeAsciiArray,
12311232
java_lang_StringCoding_encode8859_1,

runtime/compiler/env/j9method.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,7 @@ void TR_ResolvedJ9Method::construct()
28322832
{
28332833
{x(TR::java_lang_StringCoding_decode, "decode", "(Ljava/nio/charset/Charset;[BII)[C")},
28342834
{x(TR::java_lang_StringCoding_encode, "encode", "(Ljava/nio/charset/Charset;[CII)[B")},
2835+
{x(TR::java_lang_StringCoding_hasNegatives, "hasNegatives", "([BII)Z")},
28352836
{x(TR::java_lang_StringCoding_implEncodeISOArray, "implEncodeISOArray", "([BI[BII)I")},
28362837
{x(TR::java_lang_StringCoding_implEncodeAsciiArray, "implEncodeAsciiArray", "([CI[BII)I")},
28372838
{x(TR::java_lang_StringCoding_encode8859_1, "encode8859_1", "(B[B)[B")},
@@ -5135,6 +5136,7 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm)
51355136
case TR::java_lang_String_hashCodeImplCompressed:
51365137
case TR::java_lang_String_hashCodeImplDecompressed:
51375138
case TR::java_lang_StringLatin1_inflate:
5139+
case TR::java_lang_StringCoding_hasNegatives:
51385140
case TR::sun_nio_ch_NativeThread_current:
51395141
case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcDecrypt:
51405142
case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcEncrypt:

runtime/compiler/optimizer/InlinerTempForJ9.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5582,6 +5582,12 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite
55825582
return true;
55835583
}
55845584
break;
5585+
case TR::java_lang_StringCoding_hasNegatives:
5586+
if (comp->cg()->getSupportsInlineStringCodingHasNegatives())
5587+
{
5588+
return true;
5589+
}
5590+
break;
55855591
case TR::java_lang_Integer_stringSize:
55865592
case TR::java_lang_Long_stringSize:
55875593
if (comp->cg()->getSupportsIntegerStringSize())

runtime/compiler/z/codegen/J9CodeGenerator.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ J9::Z::CodeGenerator::initialize()
120120
cg->setSupportsInlineConcurrentLinkedQueue();
121121
}
122122

123+
static bool disableInlineStringCodingHasNegatives = feGetEnv("TR_DisableInlineStringCodingHasNegatives") != NULL;
124+
if (cg->getSupportsVectorRegisters() && !disableInlineStringCodingHasNegatives &&
125+
!TR::Compiler->om.canGenerateArraylets())
126+
{
127+
cg->setSupportsInlineStringCodingHasNegatives();
128+
}
129+
123130
// Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as
124131
// TRTO allocate lookup tables in persistent memory that cannot be relocated.
125132
if (comp->isOutOfProcessCompilation())
@@ -4013,6 +4020,13 @@ J9::Z::CodeGenerator::inlineDirectCall(
40134020
return resultReg != NULL;
40144021
}
40154022
break;
4023+
case TR::java_lang_StringCoding_hasNegatives:
4024+
if (cg->getSupportsInlineStringCodingHasNegatives())
4025+
{
4026+
resultReg = TR::TreeEvaluator::inlineStringCodingHasNegatives(node, cg);
4027+
return true;
4028+
}
4029+
break;
40164030
case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big:
40174031
return resultReg = comp->getOption(TR_DisableUTF16BEEncoder) ? TR::TreeEvaluator::inlineUTF16BEEncodeSIMD(node, cg)
40184032
: TR::TreeEvaluator::inlineUTF16BEEncode (node, cg);

runtime/compiler/z/codegen/J9TreeEvaluator.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,119 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator
858858
return TR::TreeEvaluator::pdclearEvaluator(node, cg);
859859
}
860860

861+
/*
862+
* This method inlines the Java API StringCoding.hasNegatives(byte src, int off, int len) using
863+
* SIMD instructions.
864+
* The method looks like below on Java 17:
865+
*
866+
* @IntrinsicCandidate
867+
* public static boolean hasNegatives(byte[] ba, int off, int len) {
868+
* for (int i = off; i < off + len; i++) {
869+
* if (ba[i] < 0) {
870+
* return true;
871+
* }
872+
* }
873+
* return false;
874+
* }
875+
* This routine behaves similarly on Java 11 and 21 as well and so is supported on those platforms too.
876+
*/
877+
TR::Register*
878+
J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg)
879+
{
880+
TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0));
881+
TR::Register *offsetReg = cg->evaluate(node->getChild(1));
882+
TR::Register *lengthReg = cg->evaluate(node->getChild(2));
883+
884+
TR::LabelSymbol *processMultiple16CharsStart = generateLabelSymbol(cg);
885+
TR::LabelSymbol *processMultiple16CharsEnd = generateLabelSymbol(cg);
886+
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
887+
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
888+
TR::LabelSymbol *processOutOfRangeChar = generateLabelSymbol(cg);
889+
890+
TR::Register *vInput = cg->allocateRegister(TR_VRF);
891+
TR::Register *vUpperLimit = cg->allocateRegister(TR_VRF);
892+
TR::Register *vComparison = cg->allocateRegister(TR_VRF);
893+
TR::Register *numCharsLeftToProcess = cg->allocateRegister(); // off + len
894+
TR::Register *outOfRangeCharIndex = cg->allocateRegister(TR_VRF);
895+
896+
TR::Register *returnReg = cg->allocateRegister();
897+
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);
898+
899+
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
900+
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
901+
generateRRInstruction(cg, TR::InstOpCode::AGFR, node, inputPtrReg, offsetReg);
902+
generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, lengthReg);
903+
904+
const uint8_t upperLimit = 127;
905+
const uint8_t rangeComparison = 0x20; // > comparison
906+
907+
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vUpperLimit, upperLimit, 0);
908+
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vComparison, rangeComparison, 0);
909+
910+
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BNH, processMultiple16CharsEnd, false, false);
911+
912+
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
913+
processMultiple16CharsStart->setStartInternalControlFlow();
914+
915+
// Load bytes and search for out of range character
916+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
917+
918+
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
919+
920+
// process bad character by setting return register to true and exiting
921+
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);
922+
923+
// Update the counters
924+
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, inputPtrReg, generateS390MemoryReference(inputPtrReg, 16, cg));
925+
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -16);
926+
927+
// Branch back up if we still have more than 16 characters to process.
928+
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BH, processMultiple16CharsStart, false, false);
929+
930+
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsEnd);
931+
932+
// Zero out the input register to avoid invalid VSTRC result
933+
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput, 0, 0 /*unused*/);
934+
935+
// VLL and VSTL work on indices so we subtract 1
936+
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -1);
937+
// Load residue bytes and check for out of range character
938+
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
939+
940+
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
941+
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);
942+
943+
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
944+
945+
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar);
946+
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1);
947+
948+
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg);
949+
dependencies->addPostConditionIfNotAlreadyInserted(vInput, TR::RealRegister::AssignAny);
950+
dependencies->addPostConditionIfNotAlreadyInserted(outOfRangeCharIndex, TR::RealRegister::AssignAny);
951+
dependencies->addPostConditionIfNotAlreadyInserted(vUpperLimit, TR::RealRegister::AssignAny);
952+
dependencies->addPostConditionIfNotAlreadyInserted(vComparison, TR::RealRegister::AssignAny);
953+
dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny);
954+
dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny);
955+
dependencies->addPostConditionIfNotAlreadyInserted(returnReg, TR::RealRegister::AssignAny);
956+
957+
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
958+
cFlowRegionEnd->setEndInternalControlFlow();
959+
960+
for (int i = 0; i < node->getNumChildren(); i++)
961+
{
962+
cg->decReferenceCount(node->getChild(i));
963+
}
964+
965+
cg->stopUsingRegister(vInput);
966+
cg->stopUsingRegister(outOfRangeCharIndex);
967+
cg->stopUsingRegister(vUpperLimit);
968+
cg->stopUsingRegister(vComparison);
969+
cg->stopUsingRegister(numCharsLeftToProcess);
970+
node->setRegister(returnReg);
971+
return returnReg;
972+
}
973+
861974
/* Moved from Codegen to FE */
862975
///////////////////////////////////////////////////////////////////////////////////
863976
// Generate code to perform a comparison and branch to a snippet.

runtime/compiler/z/codegen/J9TreeEvaluator.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator
7373
* Inline Java's (Java 11 onwards) StringLatin1.inflate([BI[CII)V
7474
*/
7575
static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg);
76+
static TR::Register *inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg);
7677
static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange = false);
7778
static TR::Register *inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray = false);
7879
static TR::Register *inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method);

0 commit comments

Comments
 (0)