Skip to content

Commit d71b12f

Browse files
authored
Merge pull request #21782 from mpirvu/shared-cache
JITServer profiling infra for CallGraph entries
2 parents 318023c + e77a648 commit d71b12f

16 files changed

+997
-190
lines changed

runtime/compiler/control/JITClientCompilationThread.cpp

Lines changed: 127 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "env/j9methodServer.hpp"
3636
#include "env/JITServerPersistentCHTable.hpp"
3737
#include "env/JSR292Methods.h"
38+
#include "env/StackMemoryRegion.hpp"
3839
#include "env/TypeLayout.hpp"
3940
#include "env/ut_j9jit.h"
4041
#include "env/VerboseLog.hpp"
@@ -113,53 +114,69 @@ findField(J9VMThread *vmStruct, J9ConstantPool *constantPool, UDATA index, BOOLE
113114
static void
114115
handler_IProfiler_profilingSample(JITServer::ClientStream *client, TR_J9VM *fe, TR::Compilation *comp)
115116
{
116-
auto recv = client->getRecvData<TR_OpaqueMethodBlock*, uint32_t, uintptr_t>();
117+
auto recv = client->getRecvData<TR_OpaqueMethodBlock*, uint32_t, bool, bool>();
117118
auto method = std::get<0>(recv);
118119
auto bcIndex = std::get<1>(recv);
119-
auto data = std::get<2>(recv); // data==1 means 'send info for 1 bytecode'; data==0 means 'send info for entire method if possible'
120+
auto wholeMethodInfo = std::get<2>(recv); // 'send info for entire method if possible'
121+
auto sharedProfile = std::get<3>(recv);
120122

121123
JITClientIProfiler *iProfiler = (JITClientIProfiler *)fe->getIProfiler();
122124

123125
bool isCompiled = TR::CompilationInfo::isCompiled((J9Method*)method);
126+
bool isQueued = TR::CompilationInfo::getJ9MethodVMExtra((J9Method *)method) == J9_JIT_QUEUED_FOR_COMPILATION;
124127
bool isInProgress = comp->getMethodBeingCompiled()->getPersistentIdentifier() == method;
125128
bool abort = false;
126-
// Used to tell the server if a profiled entry should be stored in persistent or heap memory
127-
bool usePersistentCache = isCompiled || isInProgress;
128-
bool wholeMethodInfo = data == 0;
129+
// Used to tell the server if a profiled entry should be stored in persistent or heap memory.
130+
// Note that if the method is queued for compilation, new interpreter samples will not be collected.
131+
bool usePersistentCache = isCompiled || isInProgress || isQueued;
129132

130133
if (wholeMethodInfo)
131134
{
132135
// Serialize all the information related to this method
133-
abort = iProfiler->serializeAndSendIProfileInfoForMethod(method, comp, client, usePersistentCache, isCompiled);
136+
abort = iProfiler->serializeAndSendIProfileInfoForMethod(method, comp, client, usePersistentCache, isCompiled, sharedProfile);
137+
if (!abort)
138+
return;
134139
}
135-
if (!wholeMethodInfo || abort) // Send information just for this entry
140+
141+
// Send information just for this entry
142+
std::vector<J9Class *> uncachedClasses;
143+
std::vector<JITServerHelpers::ClassInfoTuple> classInfoTuples;
144+
auto entry = iProfiler->profilingSample(method, bcIndex, comp, 0, /*addIt=*/false);
145+
if (entry && !entry->isInvalid())
136146
{
137-
auto entry = iProfiler->profilingSample(method, bcIndex, comp, data, false);
138-
if (entry && !entry->isInvalid())
139-
{
140-
uint32_t canPersist = entry->canBeSerialized(comp->getPersistentInfo()); // This may lock the entry
141-
if (canPersist == IPBC_ENTRY_CAN_PERSIST)
142-
{
143-
uint32_t bytes = entry->getBytesFootprint();
144-
std::string entryBytes(bytes, '\0');
145-
auto storage = (TR_IPBCDataStorageHeader*)&entryBytes[0];
146-
uintptr_t methodStartAddress = (uintptr_t)TR::Compiler->mtd.bytecodeStart(method);
147-
entry->serialize(methodStartAddress, storage, comp->getPersistentInfo());
148-
client->write(JITServer::MessageType::IProfiler_profilingSample, entryBytes, false, usePersistentCache, isCompiled);
149-
}
150-
else
147+
uint32_t canPersist = entry->canBeSerialized(comp->getPersistentInfo()); // This may lock the entry
148+
if (canPersist == IPBC_ENTRY_CAN_PERSIST)
149+
{
150+
uint32_t bytes = entry->getBytesFootprint();
151+
std::string entryBytes(bytes, '\0');
152+
auto storage = (TR_IPBCDataStorageHeader*)&entryBytes[0];
153+
uintptr_t methodStartAddress = (uintptr_t)TR::Compiler->mtd.bytecodeStart(method);
154+
entry->serialize(methodStartAddress, storage, comp->getPersistentInfo());
155+
156+
// Collect info about the classes the server needs but does not yet have
157+
auto cgEntry = entry->asIPBCDataCallGraph();
158+
if (cgEntry && sharedProfile)
151159
{
152-
client->write(JITServer::MessageType::IProfiler_profilingSample, std::string(), false, usePersistentCache, isCompiled);
160+
uncachedClasses.reserve(NUM_CS_SLOTS);
161+
classInfoTuples.reserve(NUM_CS_SLOTS);
162+
iProfiler->gatherUncachedClassesUsedInCGEntry(cgEntry, comp, uncachedClasses, classInfoTuples);
153163
}
154-
// Unlock the entry
155-
if (auto callGraphEntry = entry->asIPBCDataCallGraph())
156-
if (canPersist != IPBC_ENTRY_PERSIST_LOCK && callGraphEntry->isLocked())
157-
callGraphEntry->releaseEntry();
164+
165+
uint64_t totalSamples = entry->getNumSamples();
166+
client->write(JITServer::MessageType::IProfiler_profilingSample, entryBytes, totalSamples, (size_t)1, /*wholeMethod=*/false, usePersistentCache, isCompiled, uncachedClasses, classInfoTuples);
158167
}
159-
else // No valid info for specified bytecode index
168+
else
160169
{
161-
client->write(JITServer::MessageType::IProfiler_profilingSample, std::string(), false, usePersistentCache, isCompiled);
170+
client->write(JITServer::MessageType::IProfiler_profilingSample, std::string(), (uint64_t)0, (size_t)0, /*wholeMethod=*/false, usePersistentCache, isCompiled, uncachedClasses, classInfoTuples);
162171
}
172+
// Unlock the entry
173+
if (auto callGraphEntry = entry->asIPBCDataCallGraph())
174+
if (canPersist != IPBC_ENTRY_PERSIST_LOCK && callGraphEntry->isLocked())
175+
callGraphEntry->releaseEntry();
176+
}
177+
else // No valid info for specified bytecode index
178+
{
179+
client->write(JITServer::MessageType::IProfiler_profilingSample, std::string(), (uint64_t)0, (size_t)0, /*wholeMethod=*/false, usePersistentCache, isCompiled, uncachedClasses, classInfoTuples);
163180
}
164181
}
165182

@@ -3002,20 +3019,97 @@ handleServerMessage(JITServer::ClientStream *client, TR_J9VM *fe, JITServer::Mes
30023019
break;
30033020
case MessageType::AOTCache_getROMClassBatch:
30043021
{
3022+
// The server has asked about N classes but we may send ClassInfos for N+M classes with
3023+
// M being the number of base component classes that the server does not yet have.
3024+
// The classes for those extra elements are passed back in extraClasses.
3025+
std::vector<J9Class *> extraClasses;
30053026
auto recv = client->getRecvData<std::vector<J9Class *>>();
3006-
auto &ramClasses = std::get<0>(recv);
3007-
std::vector<JITServerHelpers::ClassInfoTuple> classInfos;
3027+
auto &ramClasses = std::get<0>(recv); // The classses for which the server requests information
3028+
3029+
std::vector<JITServerHelpers::ClassInfoTuple> classInfos; // This will be filled and returned to server
30083030
classInfos.reserve(ramClasses.size());
30093031

3032+
TR::StackMemoryRegion region(*trMemory);
3033+
Vector<J9Class *> baseComponentClasses(region); // Temporary storage for base classes of arrays
3034+
baseComponentClasses.reserve(ramClasses.size());
3035+
Vector<J9Class *> uncachedBaseComponentClasses(region); // Filtered set of base component classes
3036+
30103037
for (J9Class *ramClass : ramClasses)
3011-
classInfos.push_back(JITServerHelpers::packRemoteROMClassInfo(ramClass, fe->vmThread(), trMemory, true));
3038+
{
3039+
classInfos.push_back(JITServerHelpers::packRemoteROMClassInfo(ramClass, vmThread, trMemory, true/*serializeClass*/));
3040+
3041+
// If this is an array class, remember its base component class for later
3042+
int32_t numDimensions = 0;
3043+
J9Class *baseComponent = (J9Class *)TR_J9VMBase::staticGetBaseComponentClass((TR_OpaqueClassBlock *)ramClass, numDimensions);
3044+
if (numDimensions)
3045+
baseComponentClasses.push_back(baseComponent);
3046+
}
30123047

3048+
// Determine which baseComponent classes the server does not yet have
3049+
uncachedBaseComponentClasses.reserve(baseComponentClasses.size());
3050+
if (!baseComponentClasses.empty())
30133051
{
30143052
OMR::CriticalSection cs(compInfo->getclassesCachedAtServerMonitor());
3015-
compInfo->getclassesCachedAtServer().insert(ramClasses.begin(), ramClasses.end());
3053+
const auto &classesCachedAtServer = compInfo->getclassesCachedAtServer();
3054+
for (J9Class *baseComponent : baseComponentClasses)
3055+
{
3056+
if (classesCachedAtServer.find(baseComponent) == classesCachedAtServer.end()) // server doesn't have it
3057+
{
3058+
uncachedBaseComponentClasses.push_back(baseComponent);
3059+
}
3060+
}
30163061
}
30173062

3018-
client->write(response, classInfos);
3063+
// Add the uncached baseComponent classes to the classInfos vector
3064+
extraClasses.reserve(uncachedBaseComponentClasses.size());
3065+
for (J9Class *baseComponent : uncachedBaseComponentClasses)
3066+
{
3067+
classInfos.push_back(JITServerHelpers::packRemoteROMClassInfo(baseComponent, vmThread, trMemory, true/*serializeClass*/));
3068+
extraClasses.push_back(baseComponent);
3069+
}
3070+
3071+
// Send the information to the server.
3072+
client->write(response, classInfos, extraClasses);
3073+
3074+
// Finally, update client's view of classes cached by the server.
3075+
{
3076+
OMR::CriticalSection cs(compInfo->getclassesCachedAtServerMonitor());
3077+
compInfo->getclassesCachedAtServer().insert(ramClasses.begin(), ramClasses.end());
3078+
compInfo->getclassesCachedAtServer().insert(uncachedBaseComponentClasses.begin(), uncachedBaseComponentClasses.end());
3079+
} // end critical section
3080+
}
3081+
break;
3082+
3083+
case MessageType::AOTCache_getRAMClassFromClassRecordBatch:
3084+
{
3085+
// Convert several AOT cache class IDs to this client's RAMClasses
3086+
auto recv = client->getRecvData<std::vector<uintptr_t>, std::string>();
3087+
auto &classIds = std::get<0>(recv); // vector of classIDs that need to be converted into j9classes
3088+
auto &recordsStr = std::get<1>(recv); // packed serialization records that the client is missing
3089+
3090+
std::vector<J9Class *> ramClasses;
3091+
ramClasses.reserve(classIds.size());
3092+
3093+
if (auto deserializer = compInfo->getJITServerAOTDeserializer())
3094+
{
3095+
bool wasReset = false;
3096+
deserializer->cacheRecords((const uint8_t *)recordsStr.data(), recordsStr.size(), comp,
3097+
/*ignoreFailures=*/true, wasReset);
3098+
if (!wasReset)
3099+
{
3100+
for (uintptr_t id : classIds)
3101+
{
3102+
J9Class *ramClass = deserializer->getRAMClass(id, comp, wasReset);
3103+
if (wasReset)
3104+
{
3105+
ramClasses.clear();
3106+
break;
3107+
}
3108+
ramClasses.push_back(ramClass); // possibly NULL
3109+
}
3110+
}
3111+
}
3112+
client->write(response, ramClasses);
30193113
}
30203114
break;
30213115
default:

runtime/compiler/net/CommunicationStream.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class CommunicationStream
129129
// likely to lose an increment when merging/rebasing/etc.
130130
//
131131
static const uint8_t MAJOR_NUMBER = 1;
132-
static const uint16_t MINOR_NUMBER = 79; // ID: Su+UK1Q5oJlgUkWIBA6f
132+
static const uint16_t MINOR_NUMBER = 80; // ID: snCDuoE2+InCmAiOg6YU
133133
static const uint8_t PATCH_NUMBER = 0;
134134
static uint32_t CONFIGURATION_FLAGS;
135135

runtime/compiler/net/MessageTypes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ const char *messageNames[] =
265265
"KnownObjectTable_addFieldAddressFromBaseIndex",
266266
"KnownObjectTable_getFieldAddressData",
267267
"AOTCache_getROMClassBatch",
268+
"AOTCache_getRAMClassFromClassRecordBatch",
268269
"AOTCacheMap_request",
269270
"AOTCacheMap_reply"
270271
};

runtime/compiler/net/MessageTypes.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ enum MessageType : uint16_t
294294
KnownObjectTable_getFieldAddressData,
295295

296296
AOTCache_getROMClassBatch,
297+
AOTCache_getRAMClassFromClassRecordBatch,
297298

298299
AOTCacheMap_request,
299300
AOTCacheMap_reply,

runtime/compiler/runtime/IProfiler.cpp

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -976,6 +976,7 @@ TR_IProfiler::addSampleData(TR_IPBytecodeHashTableEntry *entry, uintptr_t data,
976976
// Overflow detected; divide both counters by 2
977977
existingData >>= 1;
978978
existingData &= 0x7FFF7FFF;
979+
entry->setOverflow();
979980
}
980981

981982
entry->setData(existingData + (1<<16));
@@ -988,6 +989,7 @@ TR_IProfiler::addSampleData(TR_IPBytecodeHashTableEntry *entry, uintptr_t data,
988989
// Overflow detected; divide both counters by 2
989990
existingData >>= 1;
990991
existingData &= 0x7FFF7FFF;
992+
entry->setOverflow();
991993
}
992994

993995
entry->setData(existingData + 1);
@@ -2759,7 +2761,10 @@ TR_IPBCDataCallGraph::setData(uintptr_t v, uint32_t freq)
27592761
uint16_t oldWeight = _csInfo._weight[i];
27602762
uint16_t newWeight = oldWeight + freq;
27612763
if (newWeight < oldWeight)
2762-
newWeight = 0xFFFF;
2764+
{
2765+
newWeight = 0xFFFF; // capped to 0xFFFF
2766+
setOverflow();
2767+
}
27632768
_csInfo._weight[i] = newWeight;
27642769
returnCount = newWeight;
27652770
found = true;
@@ -2783,8 +2788,11 @@ TR_IPBCDataCallGraph::setData(uintptr_t v, uint32_t freq)
27832788
// Must update the `residue` bucket
27842789
uint16_t oldResidueWeight = _csInfo._residueWeight;
27852790
uint16_t newResidueWeight = oldResidueWeight + freq;
2786-
if (newResidueWeight > 0x7FFF)
2791+
if (newResidueWeight > 0x7FFF) // _residueWeight is kept on 15 bits
2792+
{
27872793
newResidueWeight = 0x7FFF;
2794+
setOverflow();
2795+
}
27882796
_csInfo._residueWeight = newResidueWeight;
27892797
returnCount = newResidueWeight;
27902798

@@ -2803,6 +2811,7 @@ TR_IPBCDataCallGraph::setData(uintptr_t v, uint32_t freq)
28032811
_csInfo.setClazz(0, v);
28042812
_csInfo._residueWeight = 0;
28052813
returnCount = freq;
2814+
setCountersWereReset();
28062815
releaseEntry();
28072816
}
28082817
}
@@ -3001,7 +3010,7 @@ TR_IPBCDataCallGraph::canBeSerialized(TR::PersistentInfo *info)
30013010

30023011

30033012
/**
3004-
* API used by JITClient to serialize IP data of a method
3013+
* @brief API used by JITClient to serialize IP data of a method
30053014
*
30063015
* @param methodStartAddress Start address of the bytecodes for the method
30073016
* @param storage Storage area where we serialize entries
@@ -3018,7 +3027,7 @@ TR_IPBCDataCallGraph::serialize(uintptr_t methodStartAddress, TR_IPBCDataStorage
30183027
storage->ID = TR_IPBCD_CALL_GRAPH;
30193028
storage->left = 0;
30203029
storage->right = 0;
3021-
for (int32_t i=0; i < NUM_CS_SLOTS;i++)
3030+
for (int32_t i = 0; i < NUM_CS_SLOTS; i++)
30223031
{
30233032
J9Class *clazz = (J9Class *) _csInfo.getClazz(i);
30243033
if (clazz)
@@ -4554,30 +4563,6 @@ TR_IPHashedCallSite::operator new (size_t size) throw()
45544563
}
45554564

45564565

4557-
uintptr_t CallSiteProfileInfo::getClazz(int index)
4558-
{
4559-
if (TR::Compiler->om.compressObjectReferences())
4560-
//support for convert code, when it is implemented, "uncompress"
4561-
return (uintptr_t)TR::Compiler->cls.convertClassOffsetToClassPtr((TR_OpaqueClassBlock *)(uintptr_t)_clazz[index]);
4562-
else
4563-
return (uintptr_t)_clazz[index]; //things are just stored as regular pointers otherwise
4564-
}
4565-
4566-
4567-
void CallSiteProfileInfo::setClazz(int index, uintptr_t clazzPointer)
4568-
{
4569-
if (TR::Compiler->om.compressObjectReferences())
4570-
{
4571-
//support for convert code, when it is implemented, do compression
4572-
TR_OpaqueClassBlock * compressedOffset = J9JitMemory::convertClassPtrToClassOffset((J9Class *)clazzPointer); //compressed 32bit pointer
4573-
//if we end up with something in the top 32bits, our compression is no good...
4574-
TR_ASSERT((!(0xFFFFFFFF00000000 & (uintptr_t)compressedOffset)), "Class pointer contains bits in the top word. Pointer given: %p Compressed: %p", clazzPointer, compressedOffset);
4575-
_clazz[index] = (uint32_t)((uintptr_t)compressedOffset); //ditch the top zeros
4576-
}
4577-
else
4578-
_clazz[index] = (uintptr_t)clazzPointer;
4579-
}
4580-
45814566
uintptr_t
45824567
CallSiteProfileInfo::getDominantClass(int32_t &sumW, int32_t &maxW)
45834568
{
@@ -4603,6 +4588,22 @@ CallSiteProfileInfo::getDominantClass(int32_t &sumW, int32_t &maxW)
46034588
return data;
46044589
}
46054590

4591+
uint32_t
4592+
CallSiteProfileInfo::getDominantSlot() const
4593+
{
4594+
uint32_t maxWeight = _residueWeight;
4595+
uint32_t maxIndex = NUM_CS_SLOTS;
4596+
for (uint32_t i = 0; i < NUM_CS_SLOTS; i++)
4597+
{
4598+
if (_weight[i] > maxWeight)
4599+
{
4600+
maxWeight = _weight[i];
4601+
maxIndex = i;
4602+
}
4603+
}
4604+
return maxIndex;
4605+
}
4606+
46064607
// Supporting code for dumping IProfiler data to stderr to track possible
46074608
// performance issues due to insufficient or wrong IProfiler info
46084609
// Code is currently inactive. To actually use one must issue

0 commit comments

Comments
 (0)