Skip to content

Commit ac4945d

Browse files
committed
Add usage of configurable property to remove types when processing unfielded terms
1 parent e17a2ea commit ac4945d

File tree

7 files changed

+84
-5
lines changed

7 files changed

+84
-5
lines changed

warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,8 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
409409

410410
private List<String> contentFieldNames = Collections.emptyList();
411411

412+
private List<Type<?>> excludeUnfieldedTypes = Collections.emptyList();
413+
412414
/**
413415
* The source to use as the active query log name for all query iterators in scans generated for the shard query logic. If the value
414416
* {@value #TABLE_NAME_SOURCE} is supplied, the shard table name will be used. If {@value #QUERY_LOGIC_NAME_SOURCE} is supplied, the name of the shard query
@@ -736,6 +738,7 @@ public void copyFrom(ShardQueryConfiguration other) {
736738
this.setIvaratorCacheBufferSize(other.getIvaratorCacheBufferSize());
737739
this.setIvaratorCacheScanPersistThreshold(other.getIvaratorCacheScanPersistThreshold());
738740
this.setIvaratorCacheScanTimeout(other.getIvaratorCacheScanTimeout());
741+
this.setExcludeUnfieldedTypes(other.getExcludeUnfieldedTypes());
739742
this.setMaxFieldIndexRangeSplit(other.getMaxFieldIndexRangeSplit());
740743
this.setIvaratorMaxOpenFiles(other.getIvaratorMaxOpenFiles());
741744
this.setIvaratorNumRetries(other.getIvaratorNumRetries());
@@ -1623,6 +1626,14 @@ public void setIvaratorCacheScanTimeout(long ivaratorCacheScanTimeout) {
16231626
this.ivaratorCacheScanTimeout = ivaratorCacheScanTimeout;
16241627
}
16251628

1629+
public List<Type<?>> getExcludeUnfieldedTypes() {
1630+
return excludeUnfieldedTypes;
1631+
}
1632+
1633+
public void setExcludeUnfieldedTypes(List<Type<?>> excludeUnfieldedTypes) {
1634+
this.excludeUnfieldedTypes = excludeUnfieldedTypes;
1635+
}
1636+
16261637
public int getMaxFieldIndexRangeSplit() {
16271638
return maxFieldIndexRangeSplit;
16281639
}
@@ -3021,6 +3032,7 @@ public boolean equals(Object o) {
30213032
getIvaratorCacheBufferSize() == that.getIvaratorCacheBufferSize() &&
30223033
getIvaratorCacheScanPersistThreshold() == that.getIvaratorCacheScanPersistThreshold() &&
30233034
getIvaratorCacheScanTimeout() == that.getIvaratorCacheScanTimeout() &&
3035+
getExcludeUnfieldedTypes() == that.getExcludeUnfieldedTypes() &&
30243036
getMaxFieldIndexRangeSplit() == that.getMaxFieldIndexRangeSplit() &&
30253037
getIvaratorMaxOpenFiles() == that.getIvaratorMaxOpenFiles() &&
30263038
getIvaratorNumRetries() == that.getIvaratorNumRetries() &&
@@ -3291,6 +3303,7 @@ public int hashCode() {
32913303
getIvaratorCacheBufferSize(),
32923304
getIvaratorCacheScanPersistThreshold(),
32933305
getIvaratorCacheScanTimeout(),
3306+
getExcludeUnfieldedTypes(),
32943307
getMaxFieldIndexRangeSplit(),
32953308
getIvaratorMaxOpenFiles(),
32963309
getIvaratorNumRetries(),

warehouse/query-core/src/main/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTerms.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ protected JexlNode expandNodeForNormalizers(JexlNode node, Object data) {
332332
if (fieldName.equals(Constants.ANY_FIELD)) {
333333
try {
334334
dataTypes.addAll(helper.getAllDatatypes());
335+
for (Type<?> type : config.getExcludeUnfieldedTypes()) {
336+
dataTypes.removeIf(dataType -> dataType.getClass().equals(type.getClass()));
337+
}
338+
335339
} catch (InstantiationException | IllegalAccessException | TableNotFoundException e) {
336340
log.error("Could not fetch all DataTypes while expanding unfielded term");
337341
throw new RuntimeException(e);

warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2412,6 +2412,14 @@ public void setIvaratorCacheScanTimeoutMinutes(long hdfsCacheScanTimeoutMinutes)
24122412
getConfig().setIvaratorCacheScanTimeout(hdfsCacheScanTimeoutMinutes * 1000 * 60);
24132413
}
24142414

2415+
public List<Type<?>> getExcludeUnfieldedTypes() {
2416+
return getConfig().getExcludeUnfieldedTypes();
2417+
}
2418+
2419+
public void setExcludeUnfieldedTypes(List<Type<?>> excludeUnfieldedTypes) {
2420+
getConfig().setExcludeUnfieldedTypes(excludeUnfieldedTypes);
2421+
}
2422+
24152423
public String getHdfsSiteConfigURLs() {
24162424
return getConfig().getHdfsSiteConfigURLs();
24172425
}

warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import datawave.data.type.GeometryType;
3535
import datawave.data.type.LcNoDiacriticsType;
3636
import datawave.data.type.NoOpType;
37+
import datawave.data.type.NumberType;
3738
import datawave.data.type.Type;
3839
import datawave.microservice.query.Query;
3940
import datawave.microservice.query.QueryImpl;
@@ -411,6 +412,8 @@ public void setUp() throws Exception {
411412
updatedValues.put("ivaratorCacheScanPersistThreshold", 1040L);
412413
defaultValues.put("ivaratorCacheScanTimeout", 3600000L);
413414
updatedValues.put("ivaratorCacheScanTimeout", 3600L);
415+
defaultValues.put("excludeUnfieldedTypes", Collections.emptyList());
416+
updatedValues.put("excludeUnfieldedTypes", Lists.newArrayList(new NumberType()));
414417
defaultValues.put("maxFieldIndexRangeSplit", 11);
415418
updatedValues.put("maxFieldIndexRangeSplit", 20);
416419
defaultValues.put("ivaratorMaxOpenFiles", 100);

warehouse/query-core/src/test/java/datawave/query/jexl/visitors/ExecutableExpansionVisitorTest.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import java.text.DateFormat;
77
import java.text.ParseException;
88
import java.text.SimpleDateFormat;
9+
import java.util.ArrayList;
910
import java.util.Arrays;
1011
import java.util.Collections;
1112
import java.util.Date;
@@ -439,14 +440,14 @@ public void testAnyfieldNumericExpansion() throws Exception {
439440
log.debug("testAnyfieldNumericExpansion");
440441
}
441442
String[] queryStrings = {"_ANYFIELD_ =~'12340.*?'"};
442-
@SuppressWarnings("unchecked")
443-
// SOPRANO is the only one with a 0 after the 1234
444-
List<String>[] expectedLists = new List[] {Arrays.asList("SOPRANO")};
443+
// ANYFIELD numeric regex normalization is no longer used, so expect no results for this query
444+
List<String> expectedLists = new ArrayList<>();
445445
for (int i = 0; i < queryStrings.length; i++) {
446-
runTestQuery(expectedLists[i], queryStrings[i], format.parse("20091231"), format.parse("20150101"), extraParameters);
446+
runTestQuery(expectedLists, queryStrings[i], format.parse("20091231"), format.parse("20150101"), extraParameters);
447447
}
448448

449-
String expectedQueryStr = "(BAIL == '+eE1.2345' || BAIL == '+fE1.23401') && ((_Eval_ = true) && (_ANYFIELD_ =~ '12340.*?'))";
449+
// No longer expect normalized numeric regexes for ANYFIELD
450+
String expectedQueryStr = "_NOFIELD_ =~ '12340.*?'";
450451
String plan = JexlFormattedStringBuildingVisitor.buildQuery(logic.getConfig().getQueryTree());
451452
Assert.assertTrue("Expected equality: " + expectedQueryStr + " vs " + plan,
452453
TreeEqualityVisitor.isEqual(JexlASTHelper.parseJexlQuery(expectedQueryStr), logic.getConfig().getQueryTree()));

warehouse/query-core/src/test/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTermsTest.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.LENIENT;
1010
import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.STRICT;
1111

12+
import java.util.ArrayList;
1213
import java.util.Arrays;
1314
import java.util.Collections;
1415
import java.util.Date;
@@ -746,6 +747,48 @@ public void testAnyFieldTerms() throws ParseException {
746747
expandTerms("_ANYFIELD_ >= '123'", "_ANYFIELD_ >= '+cE1.23' || _ANYFIELD_ >= '123'");
747748
}
748749

750+
/**
751+
* Test type exclusion for each node type
752+
*
753+
* @throws ParseException
754+
* if the query fails to parse
755+
*/
756+
@Test
757+
public void testAnyFieldTermsTypeExclusion() throws ParseException {
758+
759+
Multimap<String,Type<?>> dataTypes = HashMultimap.create();
760+
dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType(), new LcType(), new NumberType(), new NoOpType()));
761+
helper.setDataTypes(dataTypes);
762+
763+
List<Type<?>> excludeUnfieldedTypes = new ArrayList<>();
764+
excludeUnfieldedTypes.add(new NumberType());
765+
config.setExcludeUnfieldedTypes(excludeUnfieldedTypes);
766+
767+
// EQ
768+
expandTerms("_ANYFIELD_ == '123'", "_ANYFIELD_ == '123'");
769+
770+
// NE
771+
expandTerms("_ANYFIELD_ != '123'", "_ANYFIELD_ != '123'");
772+
773+
// ER
774+
expandTerms("_ANYFIELD_ =~ '123'", "_ANYFIELD_ =~ '123'");
775+
776+
// NR
777+
expandTerms("_ANYFIELD_ !~ '123'", "_ANYFIELD_ !~ '123'");
778+
779+
// LT
780+
expandTerms("_ANYFIELD_ < '123'", "_ANYFIELD_ < '123'");
781+
782+
// LE
783+
expandTerms("_ANYFIELD_ <= '123'", "_ANYFIELD_ <= '123'");
784+
785+
// GT
786+
expandTerms("_ANYFIELD_ > '123'", "_ANYFIELD_ > '123'");
787+
788+
// GE
789+
expandTerms("_ANYFIELD_ >= '123'", "_ANYFIELD_ >= '123'");
790+
}
791+
749792
private void expandTerms(String original, String expected) throws ParseException {
750793
ASTJexlScript script = JexlASTHelper.parseJexlQuery(original);
751794
ASTJexlScript expanded = ExpandMultiNormalizedTerms.expandTerms(config, helper, script);

warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,13 @@
299299
<value>datawave.query.enrich.DatawaveTermFrequencyEnricher</value>
300300
</list>
301301
</property>
302+
303+
<property name="excludeUnfieldedTypes">
304+
<list value-type="datawave.data.type.Type">
305+
<bean class="datawave.data.type.NumberType" />
306+
</list>
307+
</property>
308+
302309
<property name="useFilters" value="false" />
303310
<property name="filterClassNames" value=""/>
304311

0 commit comments

Comments
 (0)