Skip to content

Commit 0b1a3eb

Browse files
authored
Merge pull request #21309 from nbhuiyan/class-arg-propagation-0.51
(0.51) Enable peeking ILGen for inlined methods related to java/util/HashMap get/put operations
2 parents d1ba02f + ee89f64 commit 0b1a3eb

File tree

5 files changed

+103
-28
lines changed

5 files changed

+103
-28
lines changed

runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@
345345
java_util_HashMap_getNode,
346346
java_util_HashMap_getNode_Object,
347347
java_util_HashMap_findNonNullKeyEntry,
348+
java_util_HashMap_hash,
349+
java_util_HashMap_put,
348350
java_util_HashMap_putImpl,
349351
java_util_HashMap_resize,
350352
java_util_HashMap_prepareArray,

runtime/compiler/env/j9method.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,6 +2183,8 @@ void TR_ResolvedJ9Method::construct()
21832183
{x(TR::java_util_HashMap_get, "get", "(Ljava/lang/Object;)Ljava/lang/Object;")},
21842184
{x(TR::java_util_HashMap_getNode, "getNode", "(ILjava/lang/Object;)Ljava/util/HashMap$Node;")},
21852185
{x(TR::java_util_HashMap_getNode_Object, "getNode", "(Ljava/lang/Object;)Ljava/util/HashMap$Node;")},
2186+
{x(TR::java_util_HashMap_hash, "hash", "(Ljava/lang/Object;)I")},
2187+
{x(TR::java_util_HashMap_put, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;")},
21862188
{x(TR::java_util_HashMap_putImpl, "putImpl", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;")},
21872189
{x(TR::java_util_HashMap_findNonNullKeyEntry, "findNonNullKeyEntry", "(Ljava/lang/Object;II)Ljava/util/HashMap$Entry;")},
21882190
{x(TR::java_util_HashMap_resize, "resize", "()[Ljava/util/HashMap$Node;")},

runtime/compiler/optimizer/InlinerTempForJ9.cpp

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,8 @@ TR_J9InlinerPolicy::determineInliningHeuristic(TR::ResolvedMethodSymbol *callerS
267267
return;
268268
}
269269

270-
void TR_MultipleCallTargetInliner::generateNodeEstimate::operator ()(TR_CallTarget *ct, TR::Compilation *comp)
270+
void
271+
TR_MultipleCallTargetInliner::NodeEstimate::operator ()(TR_CallTarget *ct, TR::Compilation *comp)
271272
{
272273
static const char *qq1 = feGetEnv("TR_NodeEstimateNumerator");
273274
static const uint32_t userNumer = ( qq1 ) ? atoi(qq1) : 1;
@@ -3321,24 +3322,25 @@ TR_Inliner::optDetailString() const throw()
33213322
return "O^O INLINER: ";
33223323
}
33233324

3324-
template <typename FunctObj>
3325-
void TR_MultipleCallTargetInliner::recursivelyWalkCallTargetAndPerformAction(TR_CallTarget *ct, FunctObj &action)
3325+
void
3326+
TR_MultipleCallTargetInliner::recursivelyWalkCallTargetAndGenerateNodeEstimate(TR_CallTarget *ct, NodeEstimate &estimate)
33263327
{
33273328

3328-
debugTrace(tracer(),"recursivelyWalkingCallTargetAndPerformAction: Considering Target %p. node estimate before = %d maxbcindex = %d",ct,action.getNodeEstimate(),getPolicy()->getInitialBytecodeSize(ct->_calleeMethod, 0, comp()));
3329+
debugTrace(tracer(),"recursivelyWalkCallTargetAndGenerateNodeEstimate: Considering Target %p. node estimate before = %d maxbcindex = %d",ct,estimate.getNodeEstimate(),getPolicy()->getInitialBytecodeSize(ct->_calleeMethod, 0, comp()));
3330+
3331+
if (canSkipCountingNodes(ct))
3332+
return;
33293333

3330-
action(ct,comp());
3334+
estimate(ct,comp());
33313335

33323336
TR_CallSite *callsite = 0;
33333337
for(callsite = ct->_myCallees.getFirst() ; callsite ; callsite = callsite->getNext() )
33343338
{
33353339
for (int32_t i = 0 ; i < callsite->numTargets() ; i++)
33363340
{
3337-
recursivelyWalkCallTargetAndPerformAction(callsite->getTarget(i),action);
3341+
recursivelyWalkCallTargetAndGenerateNodeEstimate(callsite->getTarget(i),estimate);
33383342
}
33393343
}
3340-
3341-
33423344
}
33433345

33443346
int32_t
@@ -3936,14 +3938,14 @@ bool TR_MultipleCallTargetInliner::inlineCallTargets(TR::ResolvedMethodSymbol *c
39363938
debugTrace(tracer(), "Initially, estimatedNumberOfNodes = %d\n", estimatedNumberOfNodes);
39373939
for (calltarget = _callTargets.getFirst(); calltarget != callTargetToChop; prev = calltarget, calltarget = calltarget->getNext())
39383940
{
3939-
generateNodeEstimate myEstimate;
3940-
recursivelyWalkCallTargetAndPerformAction(calltarget, myEstimate);
3941+
NodeEstimate myEstimate;
3942+
recursivelyWalkCallTargetAndGenerateNodeEstimate(calltarget, myEstimate);
39413943
estimatedNumberOfNodes += myEstimate.getNodeEstimate();
39423944

39433945
if (comp()->trace(OMR::inlining))
3944-
traceMsg(comp(), "Estimated Number of Nodes is %d after calltarget %p", estimatedNumberOfNodes,calltarget);
3946+
traceMsg(comp(), "Estimated Number of Nodes is %d after calltarget %p\n", estimatedNumberOfNodes,calltarget);
39453947

3946-
debugTrace(tracer(),"Estimated Number of Nodes is %d after calltarget %p", estimatedNumberOfNodes,calltarget);
3948+
debugTrace(tracer(),"Estimated Number of Nodes is %d after calltarget %p\n", estimatedNumberOfNodes,calltarget);
39473949

39483950
float factor = 1.1F; // this factor was chosen based on a study of a large WAS app that showed that getMaxBytecodeindex was 92% accurate compared to nodes generated
39493951

@@ -4046,6 +4048,33 @@ bool TR_MultipleCallTargetInliner::inlineCallTargets(TR::ResolvedMethodSymbol *c
40464048
return anySuccess;
40474049
}
40484050

4051+
bool
4052+
TR_MultipleCallTargetInliner::canSkipCountingNodes(TR_CallTarget* callTarget)
4053+
{
4054+
TR::RecognizedMethod rm = callTarget->_calleeMethod->getRecognizedMethod();
4055+
switch (rm)
4056+
{
4057+
case TR::java_lang_Object_hashCode:
4058+
{
4059+
if (callTarget->_myCallSite &&
4060+
callTarget->_myCallSite->_ecsPrexArgInfo)
4061+
{
4062+
TR_PrexArgument* arg = callTarget->_myCallSite->_ecsPrexArgInfo->get(0);
4063+
if (arg && arg->getClass() && arg->classIsFixed() && arg->hasKnownObjectIndex())
4064+
{
4065+
if (comp()->trace(OMR::inlining))
4066+
traceMsg(comp(), "Skipping node counting for sub call graph of java/lang/Object.hashCode()I\n");
4067+
return true;
4068+
}
4069+
}
4070+
}
4071+
break;
4072+
default:
4073+
break;
4074+
}
4075+
return false;
4076+
}
4077+
40494078
void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_CallSite *callsite, bool currentBlockHasExceptionSuccessors, bool dontAddCalls)
40504079
{
40514080
TR_J9InlinerPolicy *j9inlinerPolicy = (TR_J9InlinerPolicy *) getPolicy();
@@ -4629,8 +4658,8 @@ void TR_MultipleCallTargetInliner::processChoppedOffCallTargets(TR_CallTarget *l
46294658
{
46304659
if (inlineSubCallGraph(calltarget))
46314660
{
4632-
generateNodeEstimate myEstimate;
4633-
recursivelyWalkCallTargetAndPerformAction(calltarget, myEstimate);
4661+
NodeEstimate myEstimate;
4662+
recursivelyWalkCallTargetAndGenerateNodeEstimate(calltarget, myEstimate);
46344663
estimatedNumberOfNodes += myEstimate.getNodeEstimate();
46354664
/*
46364665
* ForceInline targets and JSR292 methods should always be inlined regarless of budget. However, with

runtime/compiler/optimizer/J9EstimateCodeSize.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,17 @@ TR_J9EstimateCodeSize::processBytecodeAndGenerateCFG(TR_CallTarget *calltarget,
748748
auto calleeMethod = (TR_ResolvedJ9Method*)calltarget->_calleeMethod;
749749
resolvedMethod = calleeMethod->getResolvedPossiblyPrivateVirtualMethod(comp(), cpIndex, true, &isUnresolvedInCP);
750750

751+
if (resolvedMethod)
752+
{
753+
TR::RecognizedMethod rm = resolvedMethod->getRecognizedMethod();
754+
if (rm == TR::java_util_HashMap_put ||
755+
rm == TR::java_util_HashMap_get ||
756+
rm == TR::java_lang_Object_hashCode)
757+
{
758+
nph.setNeedsPeekingToTrue();
759+
heuristicTrace(tracer(), "Depth %d: invokevirtual call at bc index %d has Signature %s, enabled peeking for caller to propagate prex arg info from caller.", _recursionDepth, i, tracer()->traceSignature(resolvedMethod));
760+
}
761+
}
751762
///if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), true))
752763
if ((isUnresolvedInCP && !resolvedMethod) || (resolvedMethod
753764
&& resolvedMethod->isCold(comp(), true)))
@@ -824,6 +835,13 @@ TR_J9EstimateCodeSize::processBytecodeAndGenerateCFG(TR_CallTarget *calltarget,
824835
TR::Node *parent = 0;
825836
TR::Node *callNode = 0;
826837
TR::ResolvedMethodSymbol *resolvedSymbol = 0;
838+
839+
if (resolvedMethod &&
840+
resolvedMethod->getRecognizedMethod() == TR::java_util_HashMap_hash)
841+
{
842+
nph.setNeedsPeekingToTrue();
843+
heuristicTrace(tracer(), "Depth %d: invokestatic call at bc index %d has Signature %s, enabled peeking for caller to propagate prex arg info from caller.", _recursionDepth, i, tracer()->traceSignature(resolvedMethod));
844+
}
827845
if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), false))
828846
{
829847
if (unresolvedSymbolsAreCold)
@@ -846,21 +864,26 @@ TR_J9EstimateCodeSize::processBytecodeAndGenerateCFG(TR_CallTarget *calltarget,
846864
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
847865
break;
848866
case J9BCinvokeinterface:
849-
cpIndex = bci.next2Bytes();
850-
#if JAVA_SPEC_VERSION >= 21
851867
{
868+
cpIndex = bci.next2Bytes();
852869
TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);
853870
if (meth)
854871
{
855872
const char * sig = meth->signature(comp()->trMemory());
856-
if (sig && (!strncmp(sig, "java/lang/foreign/MemorySegment.get", 35) || !strncmp(sig, "java/lang/foreign/MemorySegment.set", 35) ))
873+
if (sig && (!strncmp(sig, "java/util/Map.put", 17) || !strncmp(sig, "java/util/Map.get", 17)))
874+
{
875+
nph.setNeedsPeekingToTrue();
876+
heuristicTrace(tracer(), "Depth %d: invokeinterface call at bc index %d has Signature %s, enabled peeking for caller to propagate prex arg info from caller.", _recursionDepth, i, sig);
877+
}
878+
#if JAVA_SPEC_VERSION >= 21
879+
else if (sig && (!strncmp(sig, "java/lang/foreign/MemorySegment.get", 35) || !strncmp(sig, "java/lang/foreign/MemorySegment.set", 35) ))
857880
{
858881
nph.setNeedsPeekingToTrue();
859882
heuristicTrace(tracer(), "Depth %d: invokeinterface call at bc index %d has Signature %s, enabled peeking for caller to fold layout field load necessary for VarHandle operation inlining.", _recursionDepth, i, sig);
860883
}
884+
#endif // JAVA_SPEC_VERSION >= 21
861885
}
862886
}
863-
#endif // JAVA_SPEC_VERSION >= 21
864887
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
865888
break;
866889
case J9BCgetfield:

runtime/compiler/optimizer/J9Inliner.hpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,12 @@ class TR_MultipleCallTargetInliner : public TR_InlinerBase
6969
{
7070
public:
7171

72-
template <typename FunctObj>
73-
void recursivelyWalkCallTargetAndPerformAction(TR_CallTarget *ct, FunctObj &action);
74-
75-
//void generateNodeEstimate(TR_CallTarget *ct, TR::Compilation *comp);
76-
77-
class generateNodeEstimate
72+
struct NodeEstimate
7873
{
79-
public:
80-
generateNodeEstimate() : _nodeEstimate(0){ }
81-
void operator()(TR_CallTarget *ct, TR::Compilation *comp);
82-
int32_t getNodeEstimate() { return _nodeEstimate; }
74+
NodeEstimate() : _nodeEstimate(0){ }
75+
void operator()(TR_CallTarget *ct, TR::Compilation *comp);
76+
int32_t getNodeEstimate() { return _nodeEstimate; }
77+
8378
private:
8479
int32_t _nodeEstimate;
8580
};
@@ -91,6 +86,16 @@ class TR_MultipleCallTargetInliner : public TR_InlinerBase
9186

9287
TR_LinkHead<TR_CallTarget> _callTargets; // This list only contains the call targets from top most level
9388

89+
/*
90+
* \brief Recursively walks call target and estimates the number of nodes of a call graph.
91+
*
92+
* \param ct
93+
* the TR_CallTarget to evaluate
94+
* \param estimate
95+
* the NodeEstimate to keep track of the number of nodes
96+
*/
97+
void recursivelyWalkCallTargetAndGenerateNodeEstimate(TR_CallTarget *ct, NodeEstimate &estimate);
98+
9499
protected:
95100
virtual int32_t scaleSizeBasedOnBlockFrequency(int32_t bytecodeSize, int32_t frequency, int32_t borderFrequency, TR_ResolvedMethod * calleeResolvedMethod, TR::Node *callNode, int32_t coldBorderFrequency = 0);
96101
float getScalingFactor(float factor);
@@ -128,6 +133,20 @@ class TR_MultipleCallTargetInliner : public TR_InlinerBase
128133
* True if the given calltarget should be inlined
129134
*/
130135
bool inlineSubCallGraph(TR_CallTarget* calltarget);
136+
137+
/*
138+
* \brief
139+
* For some call targets and their sub call graphs, it may be possible to simplify them into simple operations in
140+
* certain situations, such as when known object info is being passed as arg. In such cases, the node count
141+
* obtained via NodeEstimate would not truly reflect the number of nodes that are actually introduced. This
142+
* function provides a mechanism for examining call targets and evaluating whether it is safe to skip counting nodes.
143+
*
144+
* \param callTarget
145+
* the call target to examine
146+
* \return
147+
* true if node counting can be skipped for callTarget, false otherwise
148+
*/
149+
bool canSkipCountingNodes(TR_CallTarget* callTarget);
131150
};
132151

133152
class TR_J9InlinerUtil: public OMR_InlinerUtil

0 commit comments

Comments
 (0)