Skip to content

Commit 874f461

Browse files
Implementing document updates with updateDocValues API (#847)
1. This PR Implements updating document with the updateDocValues API. 2. Adds metrics we used to compare the POC performance, these metrics are - updateDocValue qps - updteDocValue latency - individual addDocument qps - individual addDocument latency
1 parent 1230af2 commit 874f461

File tree

7 files changed

+672
-1
lines changed

7 files changed

+672
-1
lines changed

clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,8 @@ message AddDocumentRequest {
843843
}
844844
// Map of field name to a list of string values
845845
map<string, MultiValuedField> fields = 3;
846+
// request type
847+
IndexingRequestType requestType = 4;
846848
}
847849

848850
// Path for hierarchical facets
@@ -1510,3 +1512,10 @@ message CustomRequest {
15101512
message CustomResponse {
15111513
map<string, string> response = 1; // Custom response sent by the plugin
15121514
}
1515+
1516+
enum IndexingRequestType {
1517+
// Request to add a document
1518+
ADD_DOCUMENT = 0;
1519+
// Request to update a document
1520+
UPDATE_DOC_VALUES = 1;
1521+
}

src/main/java/com/yelp/nrtsearch/server/field/NumberFieldDef.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import com.yelp.nrtsearch.server.doc.LoadedDocValues;
2121
import com.yelp.nrtsearch.server.field.properties.Bindable;
22+
import com.yelp.nrtsearch.server.field.properties.DocValueUpdatable;
2223
import com.yelp.nrtsearch.server.field.properties.RangeQueryable;
2324
import com.yelp.nrtsearch.server.field.properties.Sortable;
2425
import com.yelp.nrtsearch.server.field.properties.TermQueryable;
@@ -49,7 +50,7 @@
4950
* @param <T> doc value object type
5051
*/
5152
public abstract class NumberFieldDef<T> extends IndexableFieldDef<T>
52-
implements Bindable, Sortable, RangeQueryable, TermQueryable {
53+
implements Bindable, Sortable, RangeQueryable, TermQueryable, DocValueUpdatable {
5354
public static final Function<String, Number> INT_PARSER = Integer::valueOf;
5455
public static final Function<String, Number> LONG_PARSER = Long::valueOf;
5556
public static final Function<String, Number> FLOAT_PARSER = Float::valueOf;
@@ -274,4 +275,21 @@ public SortField getSortField(SortType type) {
274275
sortField.setMissingValue(getSortMissingValue(missingLast));
275276
return sortField;
276277
}
278+
279+
@Override
280+
public boolean isUpdatable() {
281+
if (isSearchable() || isMultiValue() || !hasDocValues()) {
282+
return false;
283+
}
284+
return true;
285+
}
286+
287+
@Override
288+
public org.apache.lucene.document.Field getUpdatableDocValueField(List<String> val) {
289+
if (val.size() > 1) {
290+
throw new IllegalArgumentException(
291+
"Cannot update multiple value field with docValueUpdate API");
292+
}
293+
return getDocValueField(parseNumberString(val.get(0)));
294+
}
277295
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright 2025 Yelp Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.yelp.nrtsearch.server.field.properties;
17+
18+
import java.util.List;
19+
20+
/**
21+
* Interface for {@link com.yelp.nrtsearch.server.field.FieldDef} types that can have their doc
22+
* values updated. For now, Lucene supports updating doc values for numeric and binary fields.
23+
*/
24+
public interface DocValueUpdatable {
25+
26+
/**
27+
* determine if this {@link com.yelp.nrtsearch.server.field.FieldDef} can be updated, this will
28+
* depend on other factors such as, if the field is searchable or is multivalued etc.
29+
*
30+
* @return if the field can be updated.
31+
*/
32+
boolean isUpdatable();
33+
34+
/**
35+
* @param value value to be updated
36+
* @return get the docValue for this {@link com.yelp.nrtsearch.server.field.FieldDef} to update.
37+
*/
38+
org.apache.lucene.document.Field getUpdatableDocValueField(List<String> value);
39+
}

src/main/java/com/yelp/nrtsearch/server/grpc/NrtsearchServer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
import com.yelp.nrtsearch.server.monitoring.DeadlineMetrics;
8787
import com.yelp.nrtsearch.server.monitoring.DirSizeCollector;
8888
import com.yelp.nrtsearch.server.monitoring.IndexMetrics;
89+
import com.yelp.nrtsearch.server.monitoring.IndexingMetrics;
8990
import com.yelp.nrtsearch.server.monitoring.MergeSchedulerCollector;
9091
import com.yelp.nrtsearch.server.monitoring.NrtMetrics;
9192
import com.yelp.nrtsearch.server.monitoring.NrtsearchMonitoringServerInterceptor;
@@ -264,6 +265,8 @@ private void registerMetrics(GlobalState globalState) {
264265
prometheusRegistry.register(new ProcStatCollector());
265266
prometheusRegistry.register(new MergeSchedulerCollector(globalState));
266267
prometheusRegistry.register(new SearchResponseCollector(globalState));
268+
// register Indexing metrics such as individual addDocument, updateDocValue latencies and qps
269+
IndexingMetrics.register(prometheusRegistry);
267270
}
268271

269272
/** Main launches the server from the command line. */

src/main/java/com/yelp/nrtsearch/server/handler/AddDocumentHandler.java

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@
2121
import com.yelp.nrtsearch.server.field.FieldDef;
2222
import com.yelp.nrtsearch.server.field.IdFieldDef;
2323
import com.yelp.nrtsearch.server.field.IndexableFieldDef;
24+
import com.yelp.nrtsearch.server.field.properties.DocValueUpdatable;
2425
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest;
26+
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField;
2527
import com.yelp.nrtsearch.server.grpc.AddDocumentResponse;
2628
import com.yelp.nrtsearch.server.grpc.DeadlineUtils;
2729
import com.yelp.nrtsearch.server.grpc.FacetHierarchyPath;
30+
import com.yelp.nrtsearch.server.grpc.IndexingRequestType;
2831
import com.yelp.nrtsearch.server.index.IndexState;
2932
import com.yelp.nrtsearch.server.index.ShardState;
33+
import com.yelp.nrtsearch.server.monitoring.IndexingMetrics;
3034
import com.yelp.nrtsearch.server.state.GlobalState;
3135
import io.grpc.Context;
3236
import io.grpc.Status;
@@ -46,7 +50,9 @@
4650
import java.util.concurrent.RejectedExecutionException;
4751
import java.util.stream.Collectors;
4852
import org.apache.lucene.document.Document;
53+
import org.apache.lucene.document.Field;
4954
import org.apache.lucene.index.IndexableField;
55+
import org.apache.lucene.index.Term;
5056
import org.slf4j.Logger;
5157
import org.slf4j.LoggerFactory;
5258

@@ -376,6 +382,8 @@ public AddDocumentHandlerException(Throwable err) {
376382
}
377383

378384
public static class DocumentIndexer implements Callable<Long> {
385+
386+
public static final double ONE_MILLION = 1000000.0;
379387
private final GlobalState globalState;
380388
private final List<AddDocumentRequest> addDocumentRequestList;
381389
private final String indexName;
@@ -406,6 +414,10 @@ public long runIndexingJob() throws Exception {
406414
shardState = indexState.getShard(0);
407415
idFieldDef = indexState.getIdFieldDef().orElse(null);
408416
for (AddDocumentRequest addDocumentRequest : addDocumentRequestList) {
417+
if (addDocumentRequest.getRequestType().equals(IndexingRequestType.UPDATE_DOC_VALUES)) {
418+
executeDocValueUpdateRequest(indexState, shardState, addDocumentRequest);
419+
continue;
420+
}
409421
DocumentsContext documentsContext =
410422
AddDocumentHandler.LuceneDocumentBuilder.getDocumentsContext(
411423
addDocumentRequest, indexState);
@@ -460,6 +472,67 @@ public long runIndexingJob() throws Exception {
460472
return shardState.writer.getMaxCompletedSequenceNumber();
461473
}
462474

475+
private void executeDocValueUpdateRequest(
476+
IndexState indexState, ShardState shardState, AddDocumentRequest addDocumentRequest) {
477+
try {
478+
IndexingMetrics.updateDocValuesRequestsReceived.labelValues(indexName).inc();
479+
Term term = buildTermForDocValueUpdate(indexState, addDocumentRequest);
480+
List<Field> updatableDocValueFields = new ArrayList<>();
481+
for (Map.Entry<String, MultiValuedField> entry :
482+
addDocumentRequest.getFieldsMap().entrySet()) {
483+
FieldDef field = indexState.getField(entry.getKey());
484+
if (field == null) {
485+
throw new IllegalArgumentException(
486+
String.format("Field: %s is not registered", entry.getKey()));
487+
}
488+
if (field.getName().equals(indexState.getIdFieldDef().get().getName())) continue;
489+
490+
if (!(field instanceof DocValueUpdatable updatable) || !(updatable.isUpdatable())) {
491+
throw new IllegalArgumentException(
492+
String.format("Field: %s is not updatable", field.getName()));
493+
}
494+
if (entry.getValue().getValueCount() > 0) {
495+
updatableDocValueFields.add(
496+
((DocValueUpdatable) field)
497+
.getUpdatableDocValueField(entry.getValue().getValueList()));
498+
}
499+
}
500+
501+
if (updatableDocValueFields.size() > 0) {
502+
long ns_start = System.nanoTime();
503+
shardState.writer.updateDocValues(term, updatableDocValueFields.toArray(new Field[0]));
504+
IndexingMetrics.updateDocValuesLatency
505+
.labelValues(indexName)
506+
.observe((System.nanoTime() - ns_start) / ONE_MILLION);
507+
}
508+
} catch (Throwable t) {
509+
logger.warn(
510+
String.format(
511+
"ThreadId: %s, IndexWriter.updateDocValues failed",
512+
Thread.currentThread().getName() + Thread.currentThread().threadId()));
513+
throw new RuntimeException("Error occurred when updating docValues ", t);
514+
}
515+
}
516+
517+
private static Term buildTermForDocValueUpdate(
518+
IndexState indexState, AddDocumentRequest addDocumentRequest) {
519+
if (indexState.getIdFieldDef().isEmpty()) {
520+
throw new RuntimeException(
521+
" Index needs to have an ID field to execute update DocValue request");
522+
}
523+
String idFieldName = indexState.getIdFieldDef().get().getName();
524+
if (addDocumentRequest.getFieldsMap().get(idFieldName).getValueCount() == 0) {
525+
throw new IllegalArgumentException(
526+
String.format("the _ID should have a value set to execute update DocValue"));
527+
}
528+
String idFieldValue = addDocumentRequest.getFieldsMap().get(idFieldName).getValue(0);
529+
530+
if (idFieldValue == null || idFieldValue.isEmpty()) {
531+
throw new IllegalArgumentException(String.format("the value of _ID field cannot be emtpy"));
532+
}
533+
return new Term(idFieldName, idFieldValue);
534+
}
535+
463536
/**
464537
* update documents with nested objects
465538
*
@@ -488,7 +561,12 @@ private void updateNestedDocuments(
488561
}
489562

490563
documents.add(rootDoc);
564+
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc();
565+
long ns_start = System.nanoTime();
491566
shardState.writer.updateDocuments(idFieldDef.getTerm(rootDoc), documents);
567+
IndexingMetrics.addDocumentLatency
568+
.labelValues(indexName)
569+
.observe((System.nanoTime() - ns_start) / ONE_MILLION);
492570
}
493571

494572
/**
@@ -508,7 +586,12 @@ private void addNestedDocuments(
508586
}
509587
Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());
510588
documents.add(rootDoc);
589+
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc();
590+
long ns_start = System.nanoTime();
511591
shardState.writer.addDocuments(documents);
592+
IndexingMetrics.addDocumentLatency
593+
.labelValues(indexName)
594+
.observe((System.nanoTime() - ns_start) / ONE_MILLION);
512595
}
513596

514597
private void updateDocuments(
@@ -523,7 +606,12 @@ private void updateDocuments(
523606
}
524607
for (Document nextDoc : documents) {
525608
nextDoc = handleFacets(indexState, shardState, nextDoc);
609+
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc();
610+
long ns_start = System.nanoTime();
526611
shardState.writer.updateDocument(idFieldDef.getTerm(nextDoc), nextDoc);
612+
IndexingMetrics.addDocumentLatency
613+
.labelValues(indexName)
614+
.observe((System.nanoTime() - ns_start) / ONE_MILLION);
527615
}
528616
}
529617

@@ -534,6 +622,9 @@ private void addDocuments(
534622
throw new IllegalStateException(
535623
"Adding documents to an index on a replica node is not supported");
536624
}
625+
if (documents != null)
626+
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc(documents.size());
627+
long ns_start = System.nanoTime();
537628
shardState.writer.addDocuments(
538629
(Iterable<Document>)
539630
() ->
@@ -557,6 +648,11 @@ public Document next() {
557648
return nextDoc;
558649
}
559650
});
651+
if (documents != null && documents.size() >= 1) {
652+
IndexingMetrics.addDocumentLatency
653+
.labelValues(indexName)
654+
.observe((System.nanoTime() - ns_start) / ONE_MILLION / documents.size());
655+
}
560656
}
561657

562658
private Document handleFacets(IndexState indexState, ShardState shardState, Document nextDoc) {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright 2025 Yelp Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.yelp.nrtsearch.server.monitoring;
17+
18+
import io.prometheus.metrics.core.metrics.Counter;
19+
import io.prometheus.metrics.core.metrics.Summary;
20+
import io.prometheus.metrics.model.registry.PrometheusRegistry;
21+
22+
public class IndexingMetrics {
23+
24+
public static final String UPDATE_DOC_VALUES_REQUESTS_RECEIVED =
25+
"nrt_update_doc_values_requests_received";
26+
public static final String ADD_DOCUMENT_REQUESTS_RECEIVED = "nrt_add_document_requests_received";
27+
public static final String UPDATE_DOC_VALUES_LATENCY = "nrt_update_doc_values_latency_ms";
28+
public static final String ADD_DOCUMENT_LATENCY = "nrt_add_document_latency_ms";
29+
30+
public static final Counter updateDocValuesRequestsReceived =
31+
Counter.builder()
32+
.name(UPDATE_DOC_VALUES_REQUESTS_RECEIVED)
33+
.help("Number of requests received for the update doc values API ")
34+
.labelNames("index")
35+
.build();
36+
37+
// counter for addDocument requests received for the index with the index name as the label value
38+
public static final Counter addDocumentRequestsReceived =
39+
Counter.builder()
40+
.name(ADD_DOCUMENT_REQUESTS_RECEIVED)
41+
.help("Number of requests received for the add document API ")
42+
.labelNames("index")
43+
.build();
44+
45+
public static final Summary updateDocValuesLatency =
46+
Summary.builder()
47+
.name(UPDATE_DOC_VALUES_LATENCY)
48+
.help("Latency of the update doc values API")
49+
.labelNames("index")
50+
.quantile(0.25, 0.01)
51+
.quantile(0.5, 0.01)
52+
.quantile(0.95, 0.01)
53+
.quantile(0.99, 0.01)
54+
.build();
55+
56+
// gauge for the latency of the addDocument API with the index name as the label value
57+
public static final Summary addDocumentLatency =
58+
Summary.builder()
59+
.name(ADD_DOCUMENT_LATENCY)
60+
.help("Latency of the add document API")
61+
.labelNames("index")
62+
.quantile(0.25, 0.01)
63+
.quantile(0.5, 0.01)
64+
.quantile(0.95, 0.01)
65+
.quantile(0.99, 0.01)
66+
.build();
67+
68+
public static void register(PrometheusRegistry registry) {
69+
registry.register(updateDocValuesRequestsReceived);
70+
registry.register(addDocumentRequestsReceived);
71+
registry.register(updateDocValuesLatency);
72+
registry.register(addDocumentLatency);
73+
}
74+
}

0 commit comments

Comments
 (0)