Skip to content

Commit 1d01403

Browse files
committed
Merge branch 'profiling' into profiling_stripped_warming_queries
2 parents 59e6bc5 + 780f3cf commit 1d01403

File tree

4 files changed

+413
-13
lines changed

4 files changed

+413
-13
lines changed

src/main/java/com/yelp/nrtsearch/server/grpc/LuceneServer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ private void registerMetrics(GlobalState globalState) {
248248
new ProcStatCollector().register(collectorRegistry);
249249
new MergeSchedulerCollector(globalState).register(collectorRegistry);
250250
new SearchResponseCollector(globalState).register(collectorRegistry);
251+
252+
CustomIndexingMetrics.register(collectorRegistry);
251253
}
252254

253255
/** Main launches the server from the command line. */

src/main/java/com/yelp/nrtsearch/server/luceneserver/AddDocumentHandler.java

Lines changed: 140 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,33 @@
1717

1818
import com.google.protobuf.ProtocolStringList;
1919
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest;
20+
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField;
2021
import com.yelp.nrtsearch.server.grpc.DeadlineUtils;
2122
import com.yelp.nrtsearch.server.grpc.FacetHierarchyPath;
23+
import com.yelp.nrtsearch.server.luceneserver.Handler.HandlerException;
2224
import com.yelp.nrtsearch.server.luceneserver.field.FieldDef;
2325
import com.yelp.nrtsearch.server.luceneserver.field.IdFieldDef;
2426
import com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef;
27+
import com.yelp.nrtsearch.server.monitoring.CustomIndexingMetrics;
2528
import java.io.IOException;
2629
import java.util.ArrayList;
30+
import java.util.Arrays;
2731
import java.util.HashMap;
32+
import java.util.HashSet;
2833
import java.util.Iterator;
2934
import java.util.List;
3035
import java.util.Map;
36+
import java.util.Map.Entry;
3137
import java.util.Queue;
38+
import java.util.Set;
3239
import java.util.concurrent.Callable;
3340
import java.util.concurrent.LinkedBlockingDeque;
3441
import java.util.stream.Collectors;
42+
import java.util.stream.Stream;
3543
import org.apache.lucene.document.Document;
44+
import org.apache.lucene.document.Field;
3645
import org.apache.lucene.index.IndexableField;
46+
import org.apache.lucene.index.Term;
3747
import org.slf4j.Logger;
3848
import org.slf4j.LoggerFactory;
3949

@@ -45,6 +55,13 @@ public class AddDocumentHandler {
4555
* context for the AddDocumentRequest including root document and optional child documents if
4656
* schema contains nested objects
4757
*/
58+
/*
59+
constants matching elasticpipe , only needed for POC. to be deleted.
60+
*/
61+
private static final String PARTIAL_UPDATE_KEY = "_is_partial_update";
62+
63+
private static final String PARTIAL_UPDATE_FIELDS = "_partial_update_fields";
64+
4865
public static class DocumentsContext {
4966
private final Document rootDocument;
5067
private final Map<String, List<Document>> childDocuments;
@@ -77,8 +94,12 @@ public static DocumentsContext getDocumentsContext(
7794
AddDocumentRequest addDocumentRequest, IndexState indexState)
7895
throws AddDocumentHandlerException {
7996
DocumentsContext documentsContext = new DocumentsContext();
80-
Map<String, AddDocumentRequest.MultiValuedField> fields = addDocumentRequest.getFieldsMap();
81-
for (Map.Entry<String, AddDocumentRequest.MultiValuedField> entry : fields.entrySet()) {
97+
Map<String, MultiValuedField> fields = addDocumentRequest.getFieldsMap();
98+
for (Entry<String, MultiValuedField> entry : fields.entrySet()) {
99+
if (entry.getKey().equals(PARTIAL_UPDATE_KEY)
100+
|| entry.getKey().equals(PARTIAL_UPDATE_FIELDS)) {
101+
continue;
102+
}
82103
parseOneField(entry.getKey(), entry.getValue(), documentsContext, indexState);
83104
}
84105

@@ -116,7 +137,7 @@ private static void extractFieldNamesForDocument(Document document) {
116137
/** Parses a field's value, which is a MultiValuedField in all cases */
117138
private static void parseOneField(
118139
String fieldName,
119-
AddDocumentRequest.MultiValuedField value,
140+
MultiValuedField value,
120141
DocumentsContext documentsContext,
121142
IndexState indexState)
122143
throws AddDocumentHandlerException {
@@ -125,9 +146,7 @@ private static void parseOneField(
125146

126147
/** Parse MultiValuedField for a single field, which is always a List<String>. */
127148
private static void parseMultiValueField(
128-
FieldDef field,
129-
AddDocumentRequest.MultiValuedField value,
130-
DocumentsContext documentsContext)
149+
FieldDef field, MultiValuedField value, DocumentsContext documentsContext)
131150
throws AddDocumentHandlerException {
132151
ProtocolStringList fieldValues = value.getValueList();
133152
List<FacetHierarchyPath> facetHierarchyPaths = value.getFaceHierarchyPathsList();
@@ -153,7 +172,7 @@ private static void parseMultiValueField(
153172
}
154173
}
155174

156-
public static class AddDocumentHandlerException extends Handler.HandlerException {
175+
public static class AddDocumentHandlerException extends HandlerException {
157176
public AddDocumentHandlerException(String errorMessage) {
158177
super(errorMessage);
159178
}
@@ -181,6 +200,40 @@ public DocumentIndexer(
181200
this.indexName = indexName;
182201
}
183202

203+
private static boolean isPartialUpdate(AddDocumentRequest addDocumentRequest) {
204+
return addDocumentRequest.getFieldsMap().containsKey(PARTIAL_UPDATE_KEY)
205+
&& Boolean.parseBoolean(
206+
addDocumentRequest.getFieldsMap().get(PARTIAL_UPDATE_KEY).getValue(0));
207+
}
208+
209+
private static Set<String> getPartialUpdateFields(AddDocumentRequest addDocumentRequest) {
210+
Set<String> partialUpdateFields = new HashSet<>();
211+
MultiValuedField field = addDocumentRequest.getFieldsMap().get(PARTIAL_UPDATE_FIELDS);
212+
if (field != null) {
213+
// For some weird reasons, the passed hashset from Elasticpipe like [inactive] , is coming
214+
// literally as "[inactive]"
215+
// and not as [inactive]. Which means that the beginning [ and ending ] are part of the
216+
// string, whereas they should
217+
// otherwise represent the hashset/list of items. So, we need to remove the first and last
218+
// character from the string
219+
List<String> cleansedValues =
220+
field.getValueList().stream()
221+
.map(value -> value.substring(1, value.length() - 1)) // Remove enclosing brackets
222+
.flatMap(
223+
value -> {
224+
if (value.contains(",")) {
225+
return Arrays.stream(value.split(","));
226+
} else {
227+
return Stream.of(value);
228+
}
229+
})
230+
.map(String::trim) // Trim each element
231+
.collect(Collectors.toList());
232+
partialUpdateFields.addAll(cleansedValues);
233+
}
234+
return partialUpdateFields;
235+
}
236+
184237
public long runIndexingJob() throws Exception {
185238
DeadlineUtils.checkDeadline("DocumentIndexer: runIndexingJob", "INDEXING");
186239

@@ -192,16 +245,44 @@ public long runIndexingJob() throws Exception {
192245
IndexState indexState;
193246
ShardState shardState;
194247
IdFieldDef idFieldDef;
195-
248+
String ad_bid_id = "";
196249
try {
197250
indexState = globalState.getIndex(this.indexName);
198251
shardState = indexState.getShard(0);
199252
idFieldDef = indexState.getIdFieldDef().orElse(null);
200253
for (AddDocumentRequest addDocumentRequest : addDocumentRequestList) {
254+
boolean partialUpdate = isPartialUpdate(addDocumentRequest);
255+
final Set<String> partialUpdateFields;
256+
if (partialUpdate) {
257+
// removing all fields except rtb fields for the POC , for the actual implementation
258+
// we will only be getting the fields that need to be updated
259+
partialUpdateFields = getPartialUpdateFields(addDocumentRequest);
260+
Map<String, MultiValuedField> docValueFields =
261+
getDocValueFieldsForUpdateCall(addDocumentRequest, partialUpdateFields);
262+
ad_bid_id = addDocumentRequest.getFieldsMap().get("ad_bid_id").getValue(0);
263+
addDocumentRequest =
264+
AddDocumentRequest.newBuilder().putAllFields(docValueFields).build();
265+
} else {
266+
partialUpdateFields = new HashSet<>();
267+
}
268+
201269
DocumentsContext documentsContext =
202-
AddDocumentHandler.LuceneDocumentBuilder.getDocumentsContext(
203-
addDocumentRequest, indexState);
270+
LuceneDocumentBuilder.getDocumentsContext(addDocumentRequest, indexState);
271+
272+
/*
273+
if this is a partial update request, we need the only the partial update docValue fields from
274+
documentcontext.
275+
*/
276+
List<IndexableField> partialUpdateDocValueFields = new ArrayList<>();
277+
if (partialUpdate) {
278+
partialUpdateDocValueFields =
279+
documentsContext.getRootDocument().getFields().stream()
280+
.filter(f -> partialUpdateFields.contains(f.name()))
281+
.toList();
282+
}
283+
204284
if (documentsContext.hasNested()) {
285+
logger.info("Indexing nested documents for ad_bid_id: {}", ad_bid_id);
205286
try {
206287
if (idFieldDef != null) {
207288
// update documents in the queue to keep order
@@ -222,7 +303,24 @@ public long runIndexingJob() throws Exception {
222303
throw new IOException(e);
223304
}
224305
} else {
225-
documents.add(documentsContext.getRootDocument());
306+
if (partialUpdate) {
307+
CustomIndexingMetrics.updateDocValuesRequestsReceived.labels(indexName).inc();
308+
Term term = new Term(idFieldDef.getName(), ad_bid_id);
309+
// executing the partial update
310+
logger.debug(
311+
"running a partial update for the ad_bid_id: {} and fields {} in the thread {}",
312+
ad_bid_id,
313+
partialUpdateDocValueFields,
314+
Thread.currentThread().getName() + Thread.currentThread().threadId());
315+
long nanoTime = System.nanoTime();
316+
shardState.writer.updateDocValues(
317+
term, partialUpdateDocValueFields.toArray(new Field[0]));
318+
CustomIndexingMetrics.updateDocValuesLatency
319+
.labels(indexName)
320+
.set((System.nanoTime() - nanoTime));
321+
} else {
322+
documents.add(documentsContext.getRootDocument());
323+
}
226324
}
227325
}
228326
} catch (Exception e) {
@@ -252,6 +350,15 @@ public long runIndexingJob() throws Exception {
252350
return shardState.writer.getMaxCompletedSequenceNumber();
253351
}
254352

353+
private static Map<String, MultiValuedField> getDocValueFieldsForUpdateCall(
354+
AddDocumentRequest addDocumentRequest, Set<String> partialUpdateFields) {
355+
Map<String, MultiValuedField> docValueFields =
356+
addDocumentRequest.getFieldsMap().entrySet().stream()
357+
.filter(e -> partialUpdateFields.contains(e.getKey()))
358+
.collect(Collectors.toMap(Entry::getKey, Entry::getValue));
359+
return docValueFields;
360+
}
361+
255362
/**
256363
* update documents with nested objects
257364
*
@@ -267,7 +374,7 @@ private void updateNestedDocuments(
267374
ShardState shardState)
268375
throws IOException {
269376
List<Document> documents = new ArrayList<>();
270-
for (Map.Entry<String, List<Document>> e : documentsContext.getChildDocuments().entrySet()) {
377+
for (Entry<String, List<Document>> e : documentsContext.getChildDocuments().entrySet()) {
271378
documents.addAll(
272379
e.getValue().stream()
273380
.map(v -> handleFacets(indexState, shardState, v))
@@ -282,7 +389,12 @@ private void updateNestedDocuments(
282389
}
283390

284391
documents.add(rootDoc);
392+
CustomIndexingMetrics.addDocumentRequestsReceived.labels(indexName).inc();
393+
long nanoTime = System.nanoTime();
285394
shardState.writer.updateDocuments(idFieldDef.getTerm(rootDoc), documents);
395+
CustomIndexingMetrics.addDocumentLatency
396+
.labels(indexName)
397+
.set((System.nanoTime() - nanoTime));
286398
}
287399

288400
/**
@@ -296,15 +408,20 @@ private void addNestedDocuments(
296408
DocumentsContext documentsContext, IndexState indexState, ShardState shardState)
297409
throws IOException {
298410
List<Document> documents = new ArrayList<>();
299-
for (Map.Entry<String, List<Document>> e : documentsContext.getChildDocuments().entrySet()) {
411+
for (Entry<String, List<Document>> e : documentsContext.getChildDocuments().entrySet()) {
300412
documents.addAll(
301413
e.getValue().stream()
302414
.map(v -> handleFacets(indexState, shardState, v))
303415
.collect(Collectors.toList()));
304416
}
305417
Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());
306418
documents.add(rootDoc);
419+
CustomIndexingMetrics.addDocumentRequestsReceived.labels(indexName).inc();
420+
long nanoTime = System.nanoTime();
307421
shardState.writer.addDocuments(documents);
422+
CustomIndexingMetrics.addDocumentLatency
423+
.labels(indexName)
424+
.set((System.nanoTime() - nanoTime));
308425
}
309426

310427
private void updateDocuments(
@@ -314,8 +431,13 @@ private void updateDocuments(
314431
ShardState shardState)
315432
throws IOException {
316433
for (Document nextDoc : documents) {
434+
CustomIndexingMetrics.addDocumentRequestsReceived.labels(indexName).inc();
435+
long nanoTime = System.nanoTime();
317436
nextDoc = handleFacets(indexState, shardState, nextDoc);
318437
shardState.writer.updateDocument(idFieldDef.getTerm(nextDoc), nextDoc);
438+
CustomIndexingMetrics.addDocumentLatency
439+
.labels(indexName)
440+
.set((System.nanoTime() - nanoTime));
319441
}
320442
}
321443

@@ -326,6 +448,8 @@ private void addDocuments(
326448
throw new IllegalStateException(
327449
"Adding documents to an index on a replica node is not supported");
328450
}
451+
CustomIndexingMetrics.addDocumentRequestsReceived.labels(indexName).inc(documents.size());
452+
long nanoTime = System.nanoTime();
329453
shardState.writer.addDocuments(
330454
(Iterable<Document>)
331455
() ->
@@ -349,6 +473,9 @@ public Document next() {
349473
return nextDoc;
350474
}
351475
});
476+
CustomIndexingMetrics.addDocumentLatency
477+
.labels(indexName)
478+
.set((System.nanoTime() - nanoTime) / documents.size());
352479
}
353480

354481
private Document handleFacets(IndexState indexState, ShardState shardState, Document nextDoc) {
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright 2025 Yelp Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.yelp.nrtsearch.server.monitoring;
17+
18+
import io.prometheus.client.CollectorRegistry;
19+
import io.prometheus.client.Counter;
20+
import io.prometheus.client.Gauge;
21+
22+
public class CustomIndexingMetrics {
23+
public static final Counter updateDocValuesRequestsReceived =
24+
Counter.build()
25+
.name("update_doc_values_requests_received")
26+
.help("Number of requests received for the update doc values API ")
27+
.labelNames("index")
28+
.create();
29+
30+
// counter for addDocument requests received for the index with the index name as the label value
31+
public static final Counter addDocumentRequestsReceived =
32+
Counter.build()
33+
.name("add_document_requests_received")
34+
.help("Number of requests received for the add document API ")
35+
.labelNames("index")
36+
.create();
37+
38+
public static final Gauge updateDocValuesLatency =
39+
Gauge.build()
40+
.name("update_doc_values_latency")
41+
.help("Latency of the update doc values API")
42+
.labelNames("index")
43+
.create();
44+
45+
// gauge for the latency of the addDocument API with the index name as the label value
46+
public static final Gauge addDocumentLatency =
47+
Gauge.build()
48+
.name("add_document_latency")
49+
.help("Latency of the add document API")
50+
.labelNames("index")
51+
.create();
52+
53+
public static void register(CollectorRegistry registry) {
54+
registry.register(updateDocValuesRequestsReceived);
55+
registry.register(addDocumentRequestsReceived);
56+
registry.register(updateDocValuesLatency);
57+
registry.register(addDocumentLatency);
58+
}
59+
}

0 commit comments

Comments
 (0)