Skip to content

Commit 6c1efe4

Browse files
authored
dhruva_RP-13223-Parent_field_access_from_nested_query (#868)
* Enabling parent fields to be retrieved from nested queries. * Removing redundant comments. * gradlew spotlessApply * Addressing PR comments, adding parameter to the tryGetFromParentDocument * Addressing PR comments; Added support to explicitly retrieve a parent field using "_PARENT." notation * Removed fallback mechanism, updated test cases to only handle the "_PARENT." notation. * Updated IndexState to be more precise, implementation still uses _PARENT. prefix but now recursively calls the get function of SegmentDocLookup to handle multiple levels of nested objects * Removed a test file, commented another method of initializing the value of the offset field * Removed offset calculation in ObjectFieldDef and moved it to the AddDocumentHandler * Removedthe _PARENT. prefix. Enhanced segmentDocLookup and SearchRequestProcessor in response to use nestedPath to determine if the field is on the parent level or the child level. * Reverting SegmentDocLookup to only handle 1 layer of nesting * Reverting design to use _PARENT notation * Fixed bug where new parentLookup object was created for every document, getParentDocId returns exceptions instead of -1 * Removed redundant try catch block, and type cast the return of getFieldDef directly
1 parent f1313ea commit 6c1efe4

File tree

7 files changed

+805
-8
lines changed

7 files changed

+805
-8
lines changed

src/main/java/com/yelp/nrtsearch/server/doc/SegmentDocLookup.java

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import com.yelp.nrtsearch.server.field.FieldDef;
1919
import com.yelp.nrtsearch.server.field.IndexableFieldDef;
20+
import com.yelp.nrtsearch.server.index.IndexState;
2021
import java.io.IOException;
2122
import java.util.Collection;
2223
import java.util.HashMap;
@@ -35,11 +36,14 @@
3536
*/
3637
public class SegmentDocLookup implements Map<String, LoadedDocValues<?>> {
3738

39+
private static final String PARENT_FIELD_PREFIX = "_PARENT.";
3840
private final Function<String, FieldDef> fieldDefLookup;
3941
private final LeafReaderContext context;
4042
private final Map<String, LoadedDocValues<?>> loaderCache = new HashMap<>();
4143

4244
private int docId = -1;
45+
private int parentDocId = -1;
46+
private SegmentDocLookup parentLookup = null;
4347

4448
public SegmentDocLookup(Function<String, FieldDef> fieldDefLookup, LeafReaderContext context) {
4549
this.fieldDefLookup = fieldDefLookup;
@@ -54,6 +58,8 @@ public SegmentDocLookup(Function<String, FieldDef> fieldDefLookup, LeafReaderCon
5458
*/
5559
public void setDocId(int docId) {
5660
this.docId = docId;
61+
this.parentDocId = -1;
62+
// Don't reset parentLookup to null - reuse the existing instance
5763
}
5864

5965
@Override
@@ -68,7 +74,7 @@ public boolean isEmpty() {
6874

6975
/**
7076
* Check if a given field name is capable of having doc values. This does not mean there is data
71-
* present, just that there can be.
77+
* present, just that there can be. Handles "_PARENT." prefix for parent field access.
7278
*
7379
* @param key field name
7480
* @return if this field may have stored doc values
@@ -79,6 +85,11 @@ public boolean containsKey(Object key) {
7985
return false;
8086
}
8187
String fieldName = key.toString();
88+
89+
if (fieldName.startsWith(PARENT_FIELD_PREFIX)) {
90+
fieldName = fieldName.substring(PARENT_FIELD_PREFIX.length());
91+
}
92+
8293
try {
8394
FieldDef field = fieldDefLookup.apply(fieldName);
8495
return field instanceof IndexableFieldDef && ((IndexableFieldDef<?>) field).hasDocValues();
@@ -96,6 +107,9 @@ public boolean containsValue(Object value) {
96107
* Get the {@link LoadedDocValues} for a given field. Creates a new instance or uses one from the
97108
* cache. The data is loaded for the current set document id.
98109
*
110+
* <p>The system automatically determines if a field requires parent document access based on the
111+
* "_PARENT." prefix in the field name. Fields with this prefix will access the parent document.
112+
*
99113
* @param key field name
100114
* @return {@link LoadedDocValues} implementation for the given field
101115
* @throws IllegalArgumentException if the field does not support doc values, if there is a
@@ -106,6 +120,18 @@ public boolean containsValue(Object value) {
106120
public LoadedDocValues<?> get(Object key) {
107121
Objects.requireNonNull(key);
108122
String fieldName = key.toString();
123+
124+
if (fieldName.startsWith(PARENT_FIELD_PREFIX)) {
125+
String actualFieldName = fieldName.substring(PARENT_FIELD_PREFIX.length());
126+
try {
127+
SegmentDocLookup parentLookup = getParentLookup();
128+
return parentLookup.get(actualFieldName);
129+
} catch (IllegalArgumentException e) {
130+
throw new IllegalArgumentException(
131+
"Could not access parent field: " + fieldName + " - " + e.getMessage(), e);
132+
}
133+
}
134+
109135
LoadedDocValues<?> docValues = loaderCache.get(fieldName);
110136
if (docValues == null) {
111137
FieldDef fieldDef = fieldDefLookup.apply(fieldName);
@@ -128,9 +154,58 @@ public LoadedDocValues<?> get(Object key) {
128154
throw new IllegalArgumentException(
129155
"Could not set doc: " + docId + ", field: " + fieldName, e);
130156
}
157+
131158
return docValues;
132159
}
133160

161+
/**
162+
* Lazily initializes and returns the parent document lookup.
163+
*
164+
* @return SegmentDocLookup for parent document
165+
* @throws IllegalArgumentException if this document is not nested or parent document cannot be
166+
* accessed
167+
*/
168+
private SegmentDocLookup getParentLookup() {
169+
if (parentDocId == -1) {
170+
parentDocId = getParentDocId();
171+
}
172+
173+
if (parentLookup == null) {
174+
parentLookup = new SegmentDocLookup(fieldDefLookup, context);
175+
}
176+
177+
parentLookup.setDocId(parentDocId);
178+
return parentLookup;
179+
}
180+
181+
/**
182+
* Calculates the parent document ID using NESTED_DOCUMENT_OFFSET.
183+
*
184+
* @return parent document ID
185+
* @throws IllegalArgumentException if a parent docId cannot be found or does not exist
186+
*/
187+
private int getParentDocId() {
188+
IndexableFieldDef<?> offsetFieldDef =
189+
(IndexableFieldDef<?>) IndexState.getMetaField(IndexState.NESTED_DOCUMENT_OFFSET);
190+
LoadedDocValues<?> offsetDocValues;
191+
try {
192+
offsetDocValues = offsetFieldDef.getDocValues(context);
193+
offsetDocValues.setDocId(docId);
194+
} catch (IOException e) {
195+
throw new IllegalArgumentException("Could not load nested document offset values", e);
196+
}
197+
198+
if (offsetDocValues.isEmpty()) {
199+
throw new IllegalArgumentException(
200+
"Document has no nested document offset - not a nested document");
201+
}
202+
203+
Object offsetValue = offsetDocValues.getFirst();
204+
int offset = ((Number) offsetValue).intValue();
205+
// The offset represents the exact number of documents to jump forward to reach the parent
206+
return docId + offset;
207+
}
208+
134209
@Override
135210
public LoadedDocValues<?> put(String key, LoadedDocValues<?> value) {
136211
throw new UnsupportedOperationException();

src/main/java/com/yelp/nrtsearch/server/field/ObjectFieldDef.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import java.util.ArrayList;
3232
import java.util.List;
3333
import java.util.Map;
34-
import java.util.stream.Collectors;
3534
import org.apache.lucene.document.BinaryDocValuesField;
3635
import org.apache.lucene.document.Document;
3736
import org.apache.lucene.document.StoredField;
@@ -74,19 +73,22 @@ public void parseFieldWithChildren(
7473
List<Map<String, Object>> fieldValueMaps = new ArrayList<>();
7574
fieldValues.stream().map(e -> gson.fromJson(e, Map.class)).forEach(fieldValueMaps::add);
7675

77-
List<Document> childDocuments =
78-
fieldValueMaps.stream()
79-
.map(e -> createChildDocument(e, facetHierarchyPaths))
80-
.collect(Collectors.toList());
76+
int totalDocs = fieldValueMaps.size();
77+
List<Document> childDocuments = new ArrayList<>(totalDocs);
78+
79+
for (Map<String, Object> fieldValueMap : fieldValueMaps) {
80+
childDocuments.add(createChildDocument(fieldValueMap, facetHierarchyPaths));
81+
}
82+
8183
documentsContext.addChildDocuments(this.getName(), childDocuments);
8284
}
8385
}
8486

8587
/**
8688
* create a new lucene document for each nested object
8789
*
88-
* @param fieldValue
89-
* @param facetHierarchyPaths
90+
* @param fieldValue the field value to include in the document
91+
* @param facetHierarchyPaths facet hierarchy paths
9092
* @return lucene document
9193
*/
9294
private Document createChildDocument(
@@ -95,6 +97,7 @@ private Document createChildDocument(
9597
parseFieldWithChildrenObject(document, List.of(fieldValue), facetHierarchyPaths);
9698
((IndexableFieldDef<?>) (IndexState.getMetaField(IndexState.NESTED_PATH)))
9799
.parseDocumentField(document, List.of(this.getName()), List.of());
100+
98101
return document;
99102
}
100103

src/main/java/com/yelp/nrtsearch/server/handler/AddDocumentHandler.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,9 @@ private void updateNestedDocuments(
575575
documents.addAll(
576576
e.getValue().stream().map(v -> handleFacets(indexState, shardState, v)).toList());
577577
}
578+
579+
addGlobalNestedDocumentOffsets(documents);
580+
578581
Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());
579582

580583
for (Document doc : documents) {
@@ -607,6 +610,9 @@ private void addNestedDocuments(
607610
documents.addAll(
608611
e.getValue().stream().map(v -> handleFacets(indexState, shardState, v)).toList());
609612
}
613+
614+
addGlobalNestedDocumentOffsets(documents);
615+
610616
Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());
611617
documents.add(rootDoc);
612618
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc();
@@ -691,6 +697,27 @@ private Document handleFacets(IndexState indexState, ShardState shardState, Docu
691697
return nextDoc;
692698
}
693699

700+
/**
701+
* Adds global offset values to nested documents for proper ordering and retrieval.
702+
*
703+
* <p>This method calculates and assigns a global offset to each nested document within a parent
704+
* document. The offset calculation uses reverse ordering (totalNestedDocs - currentIndex)
705+
*
706+
* @param nestedDocuments the list of nested documents to process; must not be null or empty
707+
* @throws IllegalArgumentException if nestedDocuments is null
708+
*/
709+
private void addGlobalNestedDocumentOffsets(List<Document> nestedDocuments) {
710+
int totalNestedDocs = nestedDocuments.size();
711+
for (int i = 0; i < totalNestedDocs; i++) {
712+
int globalOffset = totalNestedDocs - i;
713+
nestedDocuments
714+
.get(i)
715+
.add(
716+
new org.apache.lucene.document.NumericDocValuesField(
717+
IndexState.NESTED_DOCUMENT_OFFSET, globalOffset));
718+
}
719+
}
720+
694721
@Override
695722
public Long call() throws Exception {
696723
return runIndexingJob();

src/main/java/com/yelp/nrtsearch/server/index/IndexState.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ public abstract class IndexState implements Closeable {
8383
public static final String NESTED_PATH = "_nested_path";
8484
public static final String ROOT = "_root";
8585
public static final String FIELD_NAMES = "_field_names";
86+
public static final String NESTED_DOCUMENT_OFFSET = "_parent_offset";
8687

8788
private static final Logger logger = LoggerFactory.getLogger(IndexState.class);
8889
private final GlobalState globalState;
@@ -564,6 +565,16 @@ private static Map<String, FieldDef> getPredefinedMetaFields(GlobalState globalS
564565
.setSearch(true)
565566
.setMultiValued(true)
566567
.build(),
568+
FieldDefCreator.createContext(globalState)),
569+
NESTED_DOCUMENT_OFFSET,
570+
FieldDefCreator.getInstance()
571+
.createFieldDef(
572+
NESTED_DOCUMENT_OFFSET,
573+
Field.newBuilder()
574+
.setName(NESTED_DOCUMENT_OFFSET)
575+
.setType(FieldType.INT)
576+
.setStoreDocValues(true)
577+
.build(),
567578
FieldDefCreator.createContext(globalState)));
568579
}
569580
}

0 commit comments

Comments
 (0)