You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/05/17 11:30:36 UTC
[druid] branch 26.0.0 updated: fix issues with handling arrays with all null elements and arrays of booleans in strict mode (#14297) (#14300)
This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch 26.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/26.0.0 by this push:
new f4b3ebfd9f fix issues with handling arrays with all null elements and arrays of booleans in strict mode (#14297) (#14300)
f4b3ebfd9f is described below
commit f4b3ebfd9f8d51ae923769c6ad19eeda79f0670b
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Wed May 17 04:30:28 2023 -0700
fix issues with handling arrays with all null elements and arrays of booleans in strict mode (#14297) (#14300)
---
.../java/org/apache/druid/math/expr/ExprEval.java | 13 +-
.../druid/segment/AutoTypeColumnIndexer.java | 4 +
.../apache/druid/segment/AutoTypeColumnMerger.java | 2 +-
.../incremental/IncrementalIndexAdapter.java | 6 +-
.../java/org/apache/druid/math/expr/EvalTest.java | 2 +
.../apache/druid/query/NestedDataTestUtils.java | 2 +
.../druid/query/scan/NestedDataScanQueryTest.java | 139 ++++++++++++++++++++-
.../test/resources/nested-array-test-data-2.json | 4 +
8 files changed, 161 insertions(+), 11 deletions(-)
diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java
index b2856c6c5e..1786632cd3 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java
@@ -235,6 +235,14 @@ public abstract class ExprEval<T>
private static Class convertType(@Nullable Class existing, Class next)
{
if (Number.class.isAssignableFrom(next) || next == String.class || next == Boolean.class) {
+ // coerce booleans
+ if (next == Boolean.class) {
+ if (ExpressionProcessing.useStrictBooleans()) {
+ next = Long.class;
+ } else {
+ next = String.class;
+ }
+ }
if (existing == null) {
return next;
}
@@ -245,6 +253,7 @@ public abstract class ExprEval<T>
if (next == String.class) {
return next;
}
+
// all numbers win over Integer
if (existing == Integer.class) {
return next;
@@ -807,7 +816,7 @@ public abstract class ExprEval<T>
if (value == null) {
return NullHandling.defaultDoubleValue();
}
- return value;
+ return value.doubleValue();
}
@Override
@@ -886,7 +895,7 @@ public abstract class ExprEval<T>
if (value == null) {
return NullHandling.defaultLongValue();
}
- return value;
+ return value.longValue();
}
@Override
diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
index f541b33540..c0112bd4b9 100644
--- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
@@ -164,6 +164,10 @@ public class AutoTypeColumnIndexer implements DimensionIndexer<StructuredData, S
fields.put(entry.getKey(), entry.getValue().getTypes());
}
}
+ // special handling for when column only has arrays with null elements, treat it as a string array
+ if (fields.isEmpty() && fieldIndexers.size() == 1) {
+ fields.put(fieldIndexers.firstKey(), new FieldTypeInfo.MutableTypeSet().add(ColumnType.STRING_ARRAY));
+ }
return fields;
}
diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java
index fafb1bc173..34e13959e0 100644
--- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java
+++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java
@@ -136,7 +136,7 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
}
// no data, we don't need to write this column
- if (mergedFields.size() == 0) {
+ if (numMergeIndex == 0 && mergedFields.size() == 0) {
hasOnlyNulls = true;
return;
}
diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java
index 06a2408d60..352147b377 100644
--- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java
@@ -159,10 +159,10 @@ public class IncrementalIndexAdapter implements IndexableAdapter
);
}
if (indexer instanceof AutoTypeColumnIndexer) {
- AutoTypeColumnIndexer standardIndexer = (AutoTypeColumnIndexer) indexer;
+ AutoTypeColumnIndexer autoIndexer = (AutoTypeColumnIndexer) indexer;
return new NestedColumnMergable(
- standardIndexer.getSortedValueLookups(),
- standardIndexer.getFieldTypeInfo()
+ autoIndexer.getSortedValueLookups(),
+ autoIndexer.getFieldTypeInfo()
);
}
return null;
diff --git a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java
index 61799c45af..433d32c245 100644
--- a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java
+++ b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java
@@ -1321,6 +1321,7 @@ public class EvalTest extends InitializedNullHandlingTest
// by default, booleans are handled as strings
assertBestEffortOf(true, ExpressionType.STRING, "true");
+ assertBestEffortOf(Arrays.asList(true, false), ExpressionType.STRING_ARRAY, new Object[]{"true", "false"});
assertBestEffortOf(
new byte[]{1, 2, 3, 4},
@@ -1336,6 +1337,7 @@ public class EvalTest extends InitializedNullHandlingTest
// in strict boolean mode, they are longs
ExpressionProcessing.initializeForStrictBooleansTests(true);
assertBestEffortOf(true, ExpressionType.LONG, 1L);
+ assertBestEffortOf(Arrays.asList(true, false), ExpressionType.LONG_ARRAY, new Object[]{1L, 0L});
}
finally {
// reset
diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
index 07ccfbd853..730cac72e7 100644
--- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
+++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
@@ -76,6 +76,8 @@ public class NestedDataTestUtils
public static final String TYPES_DATA_FILE = "nested-types-test-data.json";
public static final String ARRAY_TYPES_DATA_FILE = "nested-array-test-data.json";
+ public static final String ARRAY_TYPES_DATA_FILE_2 = "nested-array-test-data-2.json";
+
public static final String INCREMENTAL_SEGMENTS_NAME = "incremental";
public static final String DEFAULT_SEGMENTS_NAME = "segments";
public static final String FRONT_CODED_SEGMENTS_NAME = "segments-frontcoded";
diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
index e1d1a79f72..de8c3036cc 100644
--- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
+++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
@@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.Druids;
import org.apache.druid.query.NestedDataTestUtils;
import org.apache.druid.query.Query;
@@ -555,11 +556,7 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(100)
.context(ImmutableMap.of());
- if (NullHandling.replaceWithDefault()) {
- // null elements are replaced with default values if druid.generic.useDefaultValueForNull=true
- // ... but not until after they are persisted, so realtime query results don't match of course...
- builder.columns("arrayString", "arrayLong", "arrayDouble");
- }
+
Query<ScanResultValue> scanQuery = builder.build();
final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")};
List<Segment> realtimeSegs = ImmutableList.of(
@@ -602,6 +599,138 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest
Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
}
+ @Test
+ public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryMoreArrayTypes() throws Exception
+ {
+ Druids.ScanQueryBuilder builder = Druids.newScanQueryBuilder()
+ .dataSource("test_datasource")
+ .intervals(
+ new MultipleIntervalSegmentSpec(
+ Collections.singletonList(Intervals.ETERNITY)
+ )
+ )
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(100)
+ .context(ImmutableMap.of());
+ Query<ScanResultValue> scanQuery = builder.build();
+ final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")};
+ List<Segment> realtimeSegs = ImmutableList.of(
+ NestedDataTestUtils.createIncrementalIndex(
+ tempFolder,
+ NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+ NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+ NestedDataTestUtils.TIMESTAMP_SPEC,
+ NestedDataTestUtils.AUTO_DISCOVERY,
+ TransformSpec.NONE,
+ aggs,
+ Granularities.NONE,
+ true
+ )
+ );
+ List<Segment> segs = NestedDataTestUtils.createSegments(
+ tempFolder,
+ closer,
+ NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+ NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+ NestedDataTestUtils.TIMESTAMP_SPEC,
+ NestedDataTestUtils.AUTO_DISCOVERY,
+ TransformSpec.NONE,
+ aggs,
+ Granularities.NONE,
+ true,
+ IndexSpec.DEFAULT
+ );
+
+
+ final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+ final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery);
+
+ List<ScanResultValue> resultsRealtime = seq.toList();
+ List<ScanResultValue> resultsSegments = seq2.toList();
+ logResults(resultsSegments);
+ logResults(resultsRealtime);
+ Assert.assertEquals(1, resultsRealtime.size());
+ Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
+ Assert.assertEquals(
+ "["
+ + "[978652800000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+ + "[978739200000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+ + "[978825600000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+ + "[978912000000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1]]",
+ resultsSegments.get(0).getEvents().toString()
+ );
+ Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
+ }
+
+ @Test
+ public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryMoreArrayTypesStrictBooleans() throws Exception
+ {
+ try {
+ ExpressionProcessing.initializeForStrictBooleansTests(true);
+ Druids.ScanQueryBuilder builder = Druids.newScanQueryBuilder()
+ .dataSource("test_datasource")
+ .intervals(
+ new MultipleIntervalSegmentSpec(
+ Collections.singletonList(Intervals.ETERNITY)
+ )
+ )
+ .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(100)
+ .context(ImmutableMap.of());
+ Query<ScanResultValue> scanQuery = builder.build();
+ final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")};
+ List<Segment> realtimeSegs = ImmutableList.of(
+ NestedDataTestUtils.createIncrementalIndex(
+ tempFolder,
+ NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+ NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+ NestedDataTestUtils.TIMESTAMP_SPEC,
+ NestedDataTestUtils.AUTO_DISCOVERY,
+ TransformSpec.NONE,
+ aggs,
+ Granularities.NONE,
+ true
+ )
+ );
+ List<Segment> segs = NestedDataTestUtils.createSegments(
+ tempFolder,
+ closer,
+ NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+ NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+ NestedDataTestUtils.TIMESTAMP_SPEC,
+ NestedDataTestUtils.AUTO_DISCOVERY,
+ TransformSpec.NONE,
+ aggs,
+ Granularities.NONE,
+ true,
+ IndexSpec.DEFAULT
+ );
+
+
+ final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+ final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery);
+
+ List<ScanResultValue> resultsRealtime = seq.toList();
+ List<ScanResultValue> resultsSegments = seq2.toList();
+ logResults(resultsSegments);
+ logResults(resultsRealtime);
+ Assert.assertEquals(1, resultsRealtime.size());
+ Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
+ Assert.assertEquals(
+ "["
+ + "[978652800000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+ + "[978739200000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+ + "[978825600000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+ + "[978912000000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1]]",
+ resultsSegments.get(0).getEvents().toString()
+ );
+ Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
+ }
+ finally {
+ ExpressionProcessing.initializeForTests();
+ }
+ }
+
private static void logResults(List<ScanResultValue> results)
{
StringBuilder bob = new StringBuilder();
diff --git a/processing/src/test/resources/nested-array-test-data-2.json b/processing/src/test/resources/nested-array-test-data-2.json
new file mode 100644
index 0000000000..071ca07f54
--- /dev/null
+++ b/processing/src/test/resources/nested-array-test-data-2.json
@@ -0,0 +1,4 @@
+{"timestamp":"2001-01-05T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
+{"timestamp":"2001-01-06T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
+{"timestamp":"2001-01-07T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
+{"timestamp":"2001-01-08T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org