Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/main/java/com/yelp/nrtsearch/server/doc/DocLookup.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@
*/
public class DocLookup {
private final Function<String, FieldDef> fieldDefLookup;
private final String queryNestedPath;

public DocLookup(Function<String, FieldDef> fieldDefLookup) {
this(fieldDefLookup, null);
}

public DocLookup(Function<String, FieldDef> fieldDefLookup, String queryNestedPath) {
this.fieldDefLookup = fieldDefLookup;
this.queryNestedPath = queryNestedPath;
}

/**
Expand All @@ -37,7 +43,7 @@ public DocLookup(Function<String, FieldDef> fieldDefLookup) {
* @return lookup accessor for given segment context
*/
public SegmentDocLookup getSegmentLookup(LeafReaderContext context) {
return new SegmentDocLookup(fieldDefLookup, context);
return new SegmentDocLookup(fieldDefLookup, context, queryNestedPath);
}

/**
Expand Down
136 changes: 136 additions & 0 deletions src/main/java/com/yelp/nrtsearch/server/doc/SegmentDocLookup.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import com.yelp.nrtsearch.server.field.FieldDef;
import com.yelp.nrtsearch.server.field.IndexableFieldDef;
import com.yelp.nrtsearch.server.index.IndexState;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
Expand All @@ -38,12 +39,22 @@ public class SegmentDocLookup implements Map<String, LoadedDocValues<?>> {
private final Function<String, FieldDef> fieldDefLookup;
private final LeafReaderContext context;
private final Map<String, LoadedDocValues<?>> loaderCache = new HashMap<>();
private final String queryNestedPath;

private int docId = -1;
private SegmentDocLookup parentLookup = null; // Lazy initialized

public SegmentDocLookup(Function<String, FieldDef> fieldDefLookup, LeafReaderContext context) {
this(fieldDefLookup, context, null);
}

public SegmentDocLookup(
Function<String, FieldDef> fieldDefLookup,
LeafReaderContext context,
String queryNestedPath) {
this.fieldDefLookup = fieldDefLookup;
this.context = context;
this.queryNestedPath = queryNestedPath;
}

/**
Expand All @@ -54,6 +65,8 @@ public SegmentDocLookup(Function<String, FieldDef> fieldDefLookup, LeafReaderCon
*/
public void setDocId(int docId) {
this.docId = docId;
// Reset parent lookup when document changes
this.parentLookup = null;
}

@Override
Expand All @@ -79,6 +92,7 @@ public boolean containsKey(Object key) {
return false;
}
String fieldName = key.toString();

try {
FieldDef field = fieldDefLookup.apply(fieldName);
return field instanceof IndexableFieldDef && ((IndexableFieldDef<?>) field).hasDocValues();
Expand All @@ -96,6 +110,9 @@ public boolean containsValue(Object value) {
* Get the {@link LoadedDocValues} for a given field. Creates a new instance or uses one from the
* cache. The data is loaded for the current set document id.
*
* <p>The system automatically determines if a field requires parent document access based on the
* current nested path. Fields are resolved automatically without requiring explicit notation.
*
* @param key field name
* @return {@link LoadedDocValues} implementation for the given field
* @throws IllegalArgumentException if the field does not support doc values, if there is a
Expand All @@ -106,6 +123,30 @@ public boolean containsValue(Object value) {
public LoadedDocValues<?> get(Object key) {
Objects.requireNonNull(key);
String fieldName = key.toString();

if (queryNestedPath != null && !queryNestedPath.isEmpty() && !"_root".equals(queryNestedPath)) {
int parentLevels = resolveParentLevels(queryNestedPath, fieldName);
if (parentLevels > 0) {
SegmentDocLookup currentLookup = getParentLookup();
if (currentLookup == null) {
throw new IllegalArgumentException(
"Could not access parent field: "
+ fieldName
+ " (document may not be nested or parent field may not exist)");
}

for (int i = 1; i < parentLevels; i++) {
currentLookup = currentLookup.getParentLookup();
if (currentLookup == null) {
throw new IllegalArgumentException(
"Could not access field: " + fieldName + " (required parent level not accessible)");
}
}

return currentLookup.get(fieldName);
}
}

LoadedDocValues<?> docValues = loaderCache.get(fieldName);
if (docValues == null) {
FieldDef fieldDef = fieldDefLookup.apply(fieldName);
Expand All @@ -128,9 +169,104 @@ public LoadedDocValues<?> get(Object key) {
throw new IllegalArgumentException(
"Could not set doc: " + docId + ", field: " + fieldName, e);
}

return docValues;
}

/**
* Lazily initializes and returns the parent document lookup. Returns null if this document is not
* nested or if parent document cannot be accessed.
*
* @return SegmentDocLookup for parent document, or null if not nested
*/
private SegmentDocLookup getParentLookup() {
if (parentLookup == null) {
int parentDocId = getParentDocId();
if (parentDocId == -1) {
return null; // Not a nested document or parent not found
}
parentLookup = new SegmentDocLookup(fieldDefLookup, context);
parentLookup.setDocId(parentDocId);
}
return parentLookup;
}

/**
* Calculates the parent document ID using NESTED_DOCUMENT_OFFSET.
*
* @return parent document ID, or -1 if not found or not a nested document
*/
private int getParentDocId() {
FieldDef offsetFieldDef;
try {
offsetFieldDef = IndexState.getMetaField(IndexState.NESTED_DOCUMENT_OFFSET);
} catch (IllegalArgumentException e) {
return -1;
}

if (!(offsetFieldDef instanceof IndexableFieldDef<?> offsetIndexableFieldDef)) {
return -1;
}

LoadedDocValues<?> offsetDocValues;
try {
offsetDocValues = offsetIndexableFieldDef.getDocValues(context);
offsetDocValues.setDocId(docId);
} catch (IOException e) {
return -1;
}

if (offsetDocValues.isEmpty()) {
return -1;
}

Object offsetValue = offsetDocValues.getFirst();
int offset = ((Number) offsetValue).intValue();
// The offset represents the exact number of documents to jump forward to reach the parent
return docId + offset;
}

/**
* Utility method to resolve the relative path from current nested location to target field.
* Returns the number of parent levels to traverse to access the field.
*
* @param currentNestedPath current nested document path (e.g., "reviews.generation")
* @param targetFieldPath absolute field path (e.g., "biz_name" or "reviews.rating")
* @return number of parent levels to traverse, or -1 if field is in current or child level
*/
private static int resolveParentLevels(String currentNestedPath, String targetFieldPath) {
if (currentNestedPath == null
|| currentNestedPath.isEmpty()
|| "_root".equals(currentNestedPath)) {
return -1; // Field is at current level or below
}

if (targetFieldPath.startsWith(currentNestedPath + ".")
|| targetFieldPath.equals(currentNestedPath)) {
return -1; // Field is at current level or below
}

String[] currentPathParts = currentNestedPath.split("\\.");
String[] targetPathParts = targetFieldPath.split("\\.");

// Find common prefix
int commonPrefixLength = 0;
int minLength = Math.min(currentPathParts.length, targetPathParts.length);
for (int i = 0; i < minLength; i++) {
if (currentPathParts[i].equals(targetPathParts[i])) {
commonPrefixLength++;
} else {
break;
}
}

int levelsUp = currentPathParts.length - commonPrefixLength;

// If we need to go up to access the field, return the number of levels
// If levelsUp is 0, it means the field is at the same level or below
return levelsUp > 0 ? levelsUp : -1;
}

@Override
public LoadedDocValues<?> put(String key, LoadedDocValues<?> value) {
throw new UnsupportedOperationException();
Expand Down
17 changes: 10 additions & 7 deletions src/main/java/com/yelp/nrtsearch/server/field/ObjectFieldDef.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
Expand Down Expand Up @@ -74,19 +73,22 @@ public void parseFieldWithChildren(
List<Map<String, Object>> fieldValueMaps = new ArrayList<>();
fieldValues.stream().map(e -> gson.fromJson(e, Map.class)).forEach(fieldValueMaps::add);

List<Document> childDocuments =
fieldValueMaps.stream()
.map(e -> createChildDocument(e, facetHierarchyPaths))
.collect(Collectors.toList());
int totalDocs = fieldValueMaps.size();
List<Document> childDocuments = new ArrayList<>(totalDocs);

for (Map<String, Object> fieldValueMap : fieldValueMaps) {
childDocuments.add(createChildDocument(fieldValueMap, facetHierarchyPaths));
}

documentsContext.addChildDocuments(this.getName(), childDocuments);
}
}

/**
* create a new lucene document for each nested object
*
* @param fieldValue
* @param facetHierarchyPaths
* @param fieldValue the field value to include in the document
* @param facetHierarchyPaths facet hierarchy paths
* @return lucene document
*/
private Document createChildDocument(
Expand All @@ -95,6 +97,7 @@ private Document createChildDocument(
parseFieldWithChildrenObject(document, List.of(fieldValue), facetHierarchyPaths);
((IndexableFieldDef<?>) (IndexState.getMetaField(IndexState.NESTED_PATH)))
.parseDocumentField(document, List.of(this.getName()), List.of());

return document;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,9 @@ private void updateNestedDocuments(
documents.addAll(
e.getValue().stream().map(v -> handleFacets(indexState, shardState, v)).toList());
}

addGlobalNestedDocumentOffsets(documents);

Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());

for (Document doc : documents) {
Expand Down Expand Up @@ -607,6 +610,9 @@ private void addNestedDocuments(
documents.addAll(
e.getValue().stream().map(v -> handleFacets(indexState, shardState, v)).toList());
}

addGlobalNestedDocumentOffsets(documents);

Document rootDoc = handleFacets(indexState, shardState, documentsContext.getRootDocument());
documents.add(rootDoc);
IndexingMetrics.addDocumentRequestsReceived.labelValues(indexName).inc();
Expand Down Expand Up @@ -691,6 +697,27 @@ private Document handleFacets(IndexState indexState, ShardState shardState, Docu
return nextDoc;
}

/**
* Adds global offset values to nested documents for proper ordering and retrieval.
*
* <p>This method calculates and assigns a global offset to each nested document within a parent
* document. The offset calculation uses reverse ordering (totalNestedDocs - currentIndex)
*
* @param nestedDocuments the list of nested documents to process; must not be null or empty
* @throws IllegalArgumentException if nestedDocuments is null
*/
private void addGlobalNestedDocumentOffsets(List<Document> nestedDocuments) {
int totalNestedDocs = nestedDocuments.size();
for (int i = 0; i < totalNestedDocs; i++) {
int globalOffset = totalNestedDocs - i;
nestedDocuments
.get(i)
.add(
new org.apache.lucene.document.NumericDocValuesField(
IndexState.NESTED_DOCUMENT_OFFSET, globalOffset));
}
}

@Override
public Long call() throws Exception {
return runIndexingJob();
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/yelp/nrtsearch/server/index/IndexState.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public abstract class IndexState implements Closeable {
public static final String NESTED_PATH = "_nested_path";
public static final String ROOT = "_root";
public static final String FIELD_NAMES = "_field_names";
public static final String NESTED_DOCUMENT_OFFSET = "_parent_offset";

private static final Logger logger = LoggerFactory.getLogger(IndexState.class);
private final GlobalState globalState;
Expand Down Expand Up @@ -564,6 +565,16 @@ private static Map<String, FieldDef> getPredefinedMetaFields(GlobalState globalS
.setSearch(true)
.setMultiValued(true)
.build(),
FieldDefCreator.createContext(globalState)),
NESTED_DOCUMENT_OFFSET,
FieldDefCreator.getInstance()
.createFieldDef(
NESTED_DOCUMENT_OFFSET,
Field.newBuilder()
.setName(NESTED_DOCUMENT_OFFSET)
.setType(FieldType.INT)
.setStoreDocValues(true)
.build(),
FieldDefCreator.createContext(globalState)));
}
}
Loading
Loading