Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import com.yelp.nrtsearch.server.field.FieldDef;
import com.yelp.nrtsearch.server.field.IndexableFieldDef;
import com.yelp.nrtsearch.server.index.IndexState;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
Expand All @@ -35,11 +36,14 @@
*/
public class SegmentDocLookup implements Map<String, LoadedDocValues<?>> {

private static final String PARENT_FIELD_PREFIX = "_PARENT.";
private final Function<String, FieldDef> fieldDefLookup;
private final LeafReaderContext context;
private final Map<String, LoadedDocValues<?>> loaderCache = new HashMap<>();

private int docId = -1;
private int parentDocId = -1;
private SegmentDocLookup parentLookup = null;

public SegmentDocLookup(Function<String, FieldDef> fieldDefLookup, LeafReaderContext context) {
this.fieldDefLookup = fieldDefLookup;
Expand All @@ -54,6 +58,8 @@ public SegmentDocLookup(Function<String, FieldDef> fieldDefLookup, LeafReaderCon
*/
public void setDocId(int docId) {
this.docId = docId;
this.parentDocId = -1;
// Don't reset parentLookup to null - reuse the existing instance
}

@Override
Expand All @@ -68,7 +74,7 @@ public boolean isEmpty() {

/**
* Check if a given field name is capable of having doc values. This does not mean there is data
* present, just that there can be.
* present, just that there can be. Handles "_PARENT." prefix for parent field access.
*
* @param key field name
* @return if this field may have stored doc values
Expand All @@ -79,6 +85,11 @@ public boolean containsKey(Object key) {
return false;
}
String fieldName = key.toString();

if (fieldName.startsWith(PARENT_FIELD_PREFIX)) {
fieldName = fieldName.substring(PARENT_FIELD_PREFIX.length());
}

try {
FieldDef field = fieldDefLookup.apply(fieldName);
return field instanceof IndexableFieldDef && ((IndexableFieldDef<?>) field).hasDocValues();
Expand All @@ -96,6 +107,9 @@ public boolean containsValue(Object value) {
* Get the {@link LoadedDocValues} for a given field. Creates a new instance or uses one from the
* cache. The data is loaded for the current set document id.
*
* <p>The system automatically determines if a field requires parent document access based on the
* "_PARENT." prefix in the field name. Fields with this prefix will access the parent document.
*
* @param key field name
* @return {@link LoadedDocValues} implementation for the given field
* @throws IllegalArgumentException if the field does not support doc values, if there is a
Expand All @@ -106,6 +120,18 @@ public boolean containsValue(Object value) {
public LoadedDocValues<?> get(Object key) {
Objects.requireNonNull(key);
String fieldName = key.toString();

if (fieldName.startsWith(PARENT_FIELD_PREFIX)) {
String actualFieldName = fieldName.substring(PARENT_FIELD_PREFIX.length());
try {
SegmentDocLookup parentLookup = getParentLookup();
return parentLookup.get(actualFieldName);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"Could not access parent field: " + fieldName + " - " + e.getMessage(), e);
}
}

LoadedDocValues<?> docValues = loaderCache.get(fieldName);
if (docValues == null) {
FieldDef fieldDef = fieldDefLookup.apply(fieldName);
Expand All @@ -128,9 +154,68 @@ public LoadedDocValues<?> get(Object key) {
throw new IllegalArgumentException(
"Could not set doc: " + docId + ", field: " + fieldName, e);
}

return docValues;
}

/**
* Lazily initializes and returns the parent document lookup.
*
* @return SegmentDocLookup for parent document
* @throws IllegalArgumentException if this document is not nested or parent document cannot be
* accessed
*/
private SegmentDocLookup getParentLookup() {
if (parentDocId == -1) {
parentDocId = getParentDocId();
}

if (parentLookup == null) {
parentLookup = new SegmentDocLookup(fieldDefLookup, context);
}

parentLookup.setDocId(parentDocId);
return parentLookup;
}

/**
* Calculates the parent document ID using NESTED_DOCUMENT_OFFSET.
*
* @return parent document ID
* @throws IllegalArgumentException if a parent docId cannot be found or does not exist
*/
private int getParentDocId() {
FieldDef offsetFieldDef;
try {
offsetFieldDef = IndexState.getMetaField(IndexState.NESTED_DOCUMENT_OFFSET);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(
"Document is not a nested document - no parent document available", e);
}

if (!(offsetFieldDef instanceof IndexableFieldDef<?> offsetIndexableFieldDef)) {
throw new IllegalArgumentException("NESTED_DOCUMENT_OFFSET field is not indexable");
}

LoadedDocValues<?> offsetDocValues;
try {
offsetDocValues = offsetIndexableFieldDef.getDocValues(context);
offsetDocValues.setDocId(docId);
} catch (IOException e) {
throw new IllegalArgumentException("Could not load nested document offset values", e);
}

if (offsetDocValues.isEmpty()) {
throw new IllegalArgumentException(
"Document has no nested document offset - not a nested document");
}

Object offsetValue = offsetDocValues.getFirst();
int offset = ((Number) offsetValue).intValue();
// The offset represents the exact number of documents to jump forward to reach the parent
return docId + offset;
}

@Override
public LoadedDocValues<?> put(String key, LoadedDocValues<?> value) {
throw new UnsupportedOperationException();
Expand Down
17 changes: 10 additions & 7 deletions src/main/java/com/yelp/nrtsearch/server/field/ObjectFieldDef.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
Expand Down Expand Up @@ -74,19 +73,22 @@ public void parseFieldWithChildren(
List<Map<String, Object>> fieldValueMaps = new ArrayList<>();
fieldValues.stream().map(e -> gson.fromJson(e, Map.class)).forEach(fieldValueMaps::add);

List<Document> childDocuments =
fieldValueMaps.stream()
.map(e -> createChildDocument(e, facetHierarchyPaths))
.collect(Collectors.toList());
int totalDocs = fieldValueMaps.size();
List<Document> childDocuments = new ArrayList<>(totalDocs);

for (Map<String, Object> fieldValueMap : fieldValueMaps) {
childDocuments.add(createChildDocument(fieldValueMap, facetHierarchyPaths));
}

documentsContext.addChildDocuments(this.getName(), childDocuments);
}
}

/**
* create a new lucene document for each nested object
*
* @param fieldValue
* @param facetHierarchyPaths
* @param fieldValue the field value to include in the document
* @param facetHierarchyPaths facet hierarchy paths
* @return lucene document
*/
private Document createChildDocument(
Expand All @@ -95,6 +97,7 @@ private Document createChildDocument(
parseFieldWithChildrenObject(document, List.of(fieldValue), facetHierarchyPaths);
((IndexableFieldDef<?>) (IndexState.getMetaField(IndexState.NESTED_PATH)))
.parseDocumentField(document, List.of(this.getName()), List.of());

return document;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,9 @@ private void updateNestedDocuments(
documents.addAll(
e.getValue().stream().map(v -> handleFacets(indexState, shardState, v)).toList());
}

addGlobalNestedDocumentOffsets(documents);

Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());

for (Document doc : documents) {
Expand Down Expand Up @@ -607,6 +610,9 @@ private void addNestedDocuments(
documents.addAll(
e.getValue().stream().map(v -> handleFacets(indexState, shardState, v)).toList());
}

addGlobalNestedDocumentOffsets(documents);

Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());
documents.add(rootDoc);
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc();
Expand Down Expand Up @@ -691,6 +697,27 @@ private Document handleFacets(IndexState indexState, ShardState shardState, Docu
return nextDoc;
}

/**
* Adds global offset values to nested documents for proper ordering and retrieval.
*
* <p>This method calculates and assigns a global offset to each nested document within a parent
* document. The offset calculation uses reverse ordering (totalNestedDocs - currentIndex)
*
* @param nestedDocuments the list of nested documents to process; must not be null or empty
* @throws IllegalArgumentException if nestedDocuments is null
*/
private void addGlobalNestedDocumentOffsets(List<Document> nestedDocuments) {
int totalNestedDocs = nestedDocuments.size();
for (int i = 0; i < totalNestedDocs; i++) {
int globalOffset = totalNestedDocs - i;
nestedDocuments
.get(i)
.add(
new org.apache.lucene.document.NumericDocValuesField(
IndexState.NESTED_DOCUMENT_OFFSET, globalOffset));
}
}

@Override
public Long call() throws Exception {
return runIndexingJob();
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/yelp/nrtsearch/server/index/IndexState.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public abstract class IndexState implements Closeable {
public static final String NESTED_PATH = "_nested_path";
public static final String ROOT = "_root";
public static final String FIELD_NAMES = "_field_names";
public static final String NESTED_DOCUMENT_OFFSET = "_parent_offset";

private static final Logger logger = LoggerFactory.getLogger(IndexState.class);
private final GlobalState globalState;
Expand Down Expand Up @@ -564,6 +565,16 @@ private static Map<String, FieldDef> getPredefinedMetaFields(GlobalState globalS
.setSearch(true)
.setMultiValued(true)
.build(),
FieldDefCreator.createContext(globalState)),
NESTED_DOCUMENT_OFFSET,
FieldDefCreator.getInstance()
.createFieldDef(
NESTED_DOCUMENT_OFFSET,
Field.newBuilder()
.setName(NESTED_DOCUMENT_OFFSET)
.setType(FieldType.INT)
.setStoreDocValues(true)
.build(),
FieldDefCreator.createContext(globalState)));
}
}
Loading
Loading