@@ -858,6 +858,119 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator
858
858
return TR::TreeEvaluator::pdclearEvaluator(node, cg);
859
859
}
860
860
861
+ /*
862
+ * This method inlines the Java API StringCoding.hasNegatives(byte src, int off, int len) using
863
+ * SIMD instructions.
864
+ * The method looks like below on Java 17:
865
+ *
866
+ * @IntrinsicCandidate
867
+ * public static boolean hasNegatives(byte[] ba, int off, int len) {
868
+ * for (int i = off; i < off + len; i++) {
869
+ * if (ba[i] < 0) {
870
+ * return true;
871
+ * }
872
+ * }
873
+ * return false;
874
+ * }
875
+ * This routine behaves similarly on Java 11 and 21 as well and so is supported on those platforms too.
876
+ */
877
+ TR::Register*
878
+ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg)
879
+ {
880
+ TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0));
881
+ TR::Register *offsetReg = cg->evaluate(node->getChild(1));
882
+ TR::Register *lengthReg = cg->evaluate(node->getChild(2));
883
+
884
+ TR::LabelSymbol *processMultiple16CharsStart = generateLabelSymbol(cg);
885
+ TR::LabelSymbol *processMultiple16CharsEnd = generateLabelSymbol(cg);
886
+ TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
887
+ TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
888
+ TR::LabelSymbol *processOutOfRangeChar = generateLabelSymbol(cg);
889
+
890
+ TR::Register *vInput = cg->allocateRegister(TR_VRF);
891
+ TR::Register *vUpperLimit = cg->allocateRegister(TR_VRF);
892
+ TR::Register *vComparison = cg->allocateRegister(TR_VRF);
893
+ TR::Register *numCharsLeftToProcess = cg->allocateRegister(); // off + len
894
+ TR::Register *outOfRangeCharIndex = cg->allocateRegister(TR_VRF);
895
+
896
+ TR::Register *returnReg = cg->allocateRegister();
897
+ generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);
898
+
899
+ generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
900
+ generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
901
+ generateRRInstruction(cg, TR::InstOpCode::AGFR, node, inputPtrReg, offsetReg);
902
+ generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, lengthReg);
903
+
904
+ const uint8_t upperLimit = 127;
905
+ const uint8_t rangeComparison = 0x20; // > comparison
906
+
907
+ generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vUpperLimit, upperLimit, 0);
908
+ generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vComparison, rangeComparison, 0);
909
+
910
+ generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BNH, processMultiple16CharsEnd, false, false);
911
+
912
+ generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
913
+ processMultiple16CharsStart->setStartInternalControlFlow();
914
+
915
+ // Load bytes and search for out of range character
916
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
917
+
918
+ generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
919
+
920
+ // process bad character by setting return register to true and exiting
921
+ generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);
922
+
923
+ // Update the counters
924
+ generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, inputPtrReg, generateS390MemoryReference(inputPtrReg, 16, cg));
925
+ generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -16);
926
+
927
+ // Branch back up if we still have more than 16 characters to process.
928
+ generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BH, processMultiple16CharsStart, false, false);
929
+
930
+ generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsEnd);
931
+
932
+ // Zero out the input register to avoid invalid VSTRC result
933
+ generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput, 0, 0 /*unused*/);
934
+
935
+ // VLL and VSTL work on indices so we subtract 1
936
+ generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -1);
937
+ // Load residue bytes and check for out of range character
938
+ generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
939
+
940
+ generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
941
+ generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);
942
+
943
+ generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
944
+
945
+ generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar);
946
+ generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1);
947
+
948
+ TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg);
949
+ dependencies->addPostConditionIfNotAlreadyInserted(vInput, TR::RealRegister::AssignAny);
950
+ dependencies->addPostConditionIfNotAlreadyInserted(outOfRangeCharIndex, TR::RealRegister::AssignAny);
951
+ dependencies->addPostConditionIfNotAlreadyInserted(vUpperLimit, TR::RealRegister::AssignAny);
952
+ dependencies->addPostConditionIfNotAlreadyInserted(vComparison, TR::RealRegister::AssignAny);
953
+ dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny);
954
+ dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny);
955
+ dependencies->addPostConditionIfNotAlreadyInserted(returnReg, TR::RealRegister::AssignAny);
956
+
957
+ generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
958
+ cFlowRegionEnd->setEndInternalControlFlow();
959
+
960
+ for (int i = 0; i < node->getNumChildren(); i++)
961
+ {
962
+ cg->decReferenceCount(node->getChild(i));
963
+ }
964
+
965
+ cg->stopUsingRegister(vInput);
966
+ cg->stopUsingRegister(outOfRangeCharIndex);
967
+ cg->stopUsingRegister(vUpperLimit);
968
+ cg->stopUsingRegister(vComparison);
969
+ cg->stopUsingRegister(numCharsLeftToProcess);
970
+ node->setRegister(returnReg);
971
+ return returnReg;
972
+ }
973
+
861
974
/* Moved from Codegen to FE */
862
975
///////////////////////////////////////////////////////////////////////////////////
863
976
// Generate code to perform a comparison and branch to a snippet.
0 commit comments