Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions src/main/java/com/yelp/nrtsearch/server/doc/SegmentDocLookup.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import com.yelp.nrtsearch.server.field.FieldDef;
import com.yelp.nrtsearch.server.field.IndexableFieldDef;
import com.yelp.nrtsearch.server.index.IndexState;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
Expand Down Expand Up @@ -70,6 +71,8 @@ public boolean isEmpty() {
* Check if a given field name is capable of having doc values. This does not mean there is data
* present, just that there can be.
*
* <p>For "_PARENT." notation, this checks if the underlying parent field can have doc values.
*
* @param key field name
* @return if this field may have stored doc values
*/
Expand All @@ -79,6 +82,11 @@ public boolean containsKey(Object key) {
return false;
}
String fieldName = key.toString();

if (fieldName.startsWith("_PARENT.")) {
fieldName = fieldName.substring("_PARENT.".length());
}

try {
FieldDef field = fieldDefLookup.apply(fieldName);
return field instanceof IndexableFieldDef && ((IndexableFieldDef<?>) field).hasDocValues();
Expand All @@ -96,6 +104,10 @@ public boolean containsValue(Object value) {
* Get the {@link LoadedDocValues} for a given field. Creates a new instance or uses one from the
* cache. The data is loaded for the current set document id.
*
* <p>Clients can explicitly access parent fields using the "_PARENT." notation. For example,
* "_PARENT.biz_feature_a" will directly access the "biz_feature_a" field from the parent
* document.
*
* @param key field name
* @return {@link LoadedDocValues} implementation for the given field
* @throws IllegalArgumentException if the field does not support doc values, if there is a
Expand All @@ -106,6 +118,24 @@ public boolean containsValue(Object value) {
public LoadedDocValues<?> get(Object key) {
Objects.requireNonNull(key);
String fieldName = key.toString();

if (fieldName.startsWith("_PARENT.")) {
String parentFieldName = fieldName.substring("_PARENT.".length());
FieldDef parentFieldDef = fieldDefLookup.apply(parentFieldName);
if (parentFieldDef == null) {
throw new IllegalArgumentException("Parent field does not exist: " + parentFieldName);
}
LoadedDocValues<?> parentDocValues =
tryGetFromParentDocument(parentFieldName, parentFieldDef);
if (parentDocValues == null) {
throw new IllegalArgumentException(
"Could not access parent field: "
+ parentFieldName
+ " (document may not be nested or parent field may not exist)");
}
return parentDocValues;
}

LoadedDocValues<?> docValues = loaderCache.get(fieldName);
if (docValues == null) {
FieldDef fieldDef = fieldDefLookup.apply(fieldName);
Expand All @@ -128,9 +158,84 @@ public LoadedDocValues<?> get(Object key) {
throw new IllegalArgumentException(
"Could not set doc: " + docId + ", field: " + fieldName, e);
}

return docValues;
}

/**
* Attempt to retrieve the field from the parent document using NESTED_DOCUMENT_OFFSET.
*
* @param fieldName the name of the field to retrieve
* @param fieldDef the definition of the field to retrieve
* @return LoadedDocValues from parent document, or null if not found or not a nested document
* @throws IllegalArgumentException if there are issues accessing the offset field or parent
* document
*/
private LoadedDocValues<?> tryGetFromParentDocument(String fieldName, FieldDef fieldDef) {
FieldDef offsetFieldDef;
try {
offsetFieldDef = IndexState.getMetaField(IndexState.NESTED_DOCUMENT_OFFSET);
} catch (IllegalArgumentException e) {
// This can happen if the meta field doesn't exist, which means the caller was not a nested
// document
return null;
}

if (!(offsetFieldDef instanceof IndexableFieldDef<?> offsetIndexableFieldDef)) {
throw new IllegalArgumentException("NESTED_DOCUMENT_OFFSET field cannot have doc values");
}

LoadedDocValues<?> offsetDocValues;
try {
offsetDocValues = offsetIndexableFieldDef.getDocValues(context);
} catch (IOException e) {
throw new IllegalArgumentException(
"Could not get doc values for NESTED_DOCUMENT_OFFSET field", e);
}

try {
offsetDocValues.setDocId(docId);
} catch (IOException e) {
throw new IllegalArgumentException(
"Could not set doc: " + docId + " for NESTED_DOCUMENT_OFFSET field", e);
}

// If there's no offset value, this is not a nested document and therefore we should terminate
if (offsetDocValues.isEmpty()) {
return null;
}

Object offsetValue = offsetDocValues.getFirst();
int offset = ((Number) offsetValue).intValue();

// The offset represents the exact number of documents to jump forward to reach the parent
int parentDocId = docId + offset;
if (!(fieldDef instanceof IndexableFieldDef<?> indexableFieldDef)) {
throw new IllegalArgumentException("Field cannot have doc values: " + fieldName);
}

LoadedDocValues<?> parentDocValues;
try {
parentDocValues = indexableFieldDef.getDocValues(context);
} catch (IOException e) {
throw new IllegalArgumentException(
"Could not get doc values for parent field: " + fieldName, e);
}

try {
parentDocValues.setDocId(parentDocId);
} catch (IOException e) {
throw new IllegalArgumentException(
"Could not set parent doc: " + parentDocId + ", field: " + fieldName, e);
}

if (!parentDocValues.isEmpty()) {
return parentDocValues;
}

return null;
}

@Override
public LoadedDocValues<?> put(String key, LoadedDocValues<?> value) {
throw new UnsupportedOperationException();
Expand Down
24 changes: 16 additions & 8 deletions src/main/java/com/yelp/nrtsearch/server/field/ObjectFieldDef.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
Expand Down Expand Up @@ -74,27 +73,36 @@ public void parseFieldWithChildren(
List<Map<String, Object>> fieldValueMaps = new ArrayList<>();
fieldValues.stream().map(e -> gson.fromJson(e, Map.class)).forEach(fieldValueMaps::add);

List<Document> childDocuments =
fieldValueMaps.stream()
.map(e -> createChildDocument(e, facetHierarchyPaths))
.collect(Collectors.toList());
int totalDocs = fieldValueMaps.size();
List<Document> childDocuments = new ArrayList<>(totalDocs);

for (int i = 0; i < totalDocs; i++) {
// Calculate offset as n-i (total docs minus current index)
int offset = totalDocs - i;
childDocuments.add(createChildDocument(fieldValueMaps.get(i), facetHierarchyPaths, offset));
}

documentsContext.addChildDocuments(this.getName(), childDocuments);
}
}

/**
* create a new lucene document for each nested object
*
* @param fieldValue
* @param facetHierarchyPaths
* @param fieldValue the field value to include in the document
* @param facetHierarchyPaths facet hierarchy paths
* @param offset the offset value to set for this document (n-i)
* @return lucene document
*/
private Document createChildDocument(
Map<String, Object> fieldValue, List<List<String>> facetHierarchyPaths) {
Map<String, Object> fieldValue, List<List<String>> facetHierarchyPaths, int offset) {
Document document = new Document();
parseFieldWithChildrenObject(document, List.of(fieldValue), facetHierarchyPaths);
((IndexableFieldDef<?>) (IndexState.getMetaField(IndexState.NESTED_PATH)))
.parseDocumentField(document, List.of(this.getName()), List.of());

((IndexableFieldDef<?>) (IndexState.getMetaField(IndexState.NESTED_DOCUMENT_OFFSET)))
.parseDocumentField(document, List.of(String.valueOf(offset)), List.of());
return document;
}

Expand Down
12 changes: 12 additions & 0 deletions src/main/java/com/yelp/nrtsearch/server/index/IndexState.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public abstract class IndexState implements Closeable {
public static final String NESTED_PATH = "_nested_path";
public static final String ROOT = "_root";
public static final String FIELD_NAMES = "_field_names";
public static final String NESTED_DOCUMENT_OFFSET = "_offset";

private static final Logger logger = LoggerFactory.getLogger(IndexState.class);
private final GlobalState globalState;
Expand Down Expand Up @@ -564,6 +565,17 @@ private static Map<String, FieldDef> getPredefinedMetaFields(GlobalState globalS
.setSearch(true)
.setMultiValued(true)
.build(),
FieldDefCreator.createContext(globalState)),
NESTED_DOCUMENT_OFFSET,
FieldDefCreator.getInstance()
.createFieldDef(
NESTED_DOCUMENT_OFFSET,
Field.newBuilder()
.setName(NESTED_DOCUMENT_OFFSET)
.setType(FieldType.INT)
.setSearch(true)
.setStoreDocValues(true)
.build(),
FieldDefCreator.createContext(globalState)));
}
}
Loading
Loading