You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/05/17 11:30:36 UTC

[druid] branch 26.0.0 updated: fix issues with handling arrays with all null elements and arrays of booleans in strict mode (#14297) (#14300)

This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch 26.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/26.0.0 by this push:
     new f4b3ebfd9f fix issues with handling arrays with all null elements and arrays of booleans in strict mode (#14297) (#14300)
f4b3ebfd9f is described below

commit f4b3ebfd9f8d51ae923769c6ad19eeda79f0670b
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Wed May 17 04:30:28 2023 -0700

    fix issues with handling arrays with all null elements and arrays of booleans in strict mode (#14297) (#14300)
---
 .../java/org/apache/druid/math/expr/ExprEval.java  |  13 +-
 .../druid/segment/AutoTypeColumnIndexer.java       |   4 +
 .../apache/druid/segment/AutoTypeColumnMerger.java |   2 +-
 .../incremental/IncrementalIndexAdapter.java       |   6 +-
 .../java/org/apache/druid/math/expr/EvalTest.java  |   2 +
 .../apache/druid/query/NestedDataTestUtils.java    |   2 +
 .../druid/query/scan/NestedDataScanQueryTest.java  | 139 ++++++++++++++++++++-
 .../test/resources/nested-array-test-data-2.json   |   4 +
 8 files changed, 161 insertions(+), 11 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java
index b2856c6c5e..1786632cd3 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java
@@ -235,6 +235,14 @@ public abstract class ExprEval<T>
   private static Class convertType(@Nullable Class existing, Class next)
   {
     if (Number.class.isAssignableFrom(next) || next == String.class || next == Boolean.class) {
+      // coerce booleans
+      if (next == Boolean.class) {
+        if (ExpressionProcessing.useStrictBooleans()) {
+          next = Long.class;
+        } else {
+          next = String.class;
+        }
+      }
       if (existing == null) {
         return next;
       }
@@ -245,6 +253,7 @@ public abstract class ExprEval<T>
       if (next == String.class) {
         return next;
       }
+
       // all numbers win over Integer
       if (existing == Integer.class) {
         return next;
@@ -807,7 +816,7 @@ public abstract class ExprEval<T>
       if (value == null) {
         return NullHandling.defaultDoubleValue();
       }
-      return value;
+      return value.doubleValue();
     }
 
     @Override
@@ -886,7 +895,7 @@ public abstract class ExprEval<T>
       if (value == null) {
         return NullHandling.defaultLongValue();
       }
-      return value;
+      return value.longValue();
     }
 
     @Override
diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
index f541b33540..c0112bd4b9 100644
--- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
@@ -164,6 +164,10 @@ public class AutoTypeColumnIndexer implements DimensionIndexer<StructuredData, S
         fields.put(entry.getKey(), entry.getValue().getTypes());
       }
     }
+    // special handling for when column only has arrays with null elements, treat it as a string array
+    if (fields.isEmpty() && fieldIndexers.size() == 1) {
+      fields.put(fieldIndexers.firstKey(), new FieldTypeInfo.MutableTypeSet().add(ColumnType.STRING_ARRAY));
+    }
     return fields;
   }
 
diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java
index fafb1bc173..34e13959e0 100644
--- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java
+++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java
@@ -136,7 +136,7 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
       }
 
       // no data, we don't need to write this column
-      if (mergedFields.size() == 0) {
+      if (numMergeIndex == 0 && mergedFields.size() == 0) {
         hasOnlyNulls = true;
         return;
       }
diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java
index 06a2408d60..352147b377 100644
--- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java
@@ -159,10 +159,10 @@ public class IncrementalIndexAdapter implements IndexableAdapter
       );
     }
     if (indexer instanceof AutoTypeColumnIndexer) {
-      AutoTypeColumnIndexer standardIndexer = (AutoTypeColumnIndexer) indexer;
+      AutoTypeColumnIndexer autoIndexer = (AutoTypeColumnIndexer) indexer;
       return new NestedColumnMergable(
-          standardIndexer.getSortedValueLookups(),
-          standardIndexer.getFieldTypeInfo()
+          autoIndexer.getSortedValueLookups(),
+          autoIndexer.getFieldTypeInfo()
       );
     }
     return null;
diff --git a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java
index 61799c45af..433d32c245 100644
--- a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java
+++ b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java
@@ -1321,6 +1321,7 @@ public class EvalTest extends InitializedNullHandlingTest
 
     // by default, booleans are handled as strings
     assertBestEffortOf(true, ExpressionType.STRING, "true");
+    assertBestEffortOf(Arrays.asList(true, false), ExpressionType.STRING_ARRAY, new Object[]{"true", "false"});
 
     assertBestEffortOf(
         new byte[]{1, 2, 3, 4},
@@ -1336,6 +1337,7 @@ public class EvalTest extends InitializedNullHandlingTest
       // in strict boolean mode, they are longs
       ExpressionProcessing.initializeForStrictBooleansTests(true);
       assertBestEffortOf(true, ExpressionType.LONG, 1L);
+      assertBestEffortOf(Arrays.asList(true, false), ExpressionType.LONG_ARRAY, new Object[]{1L, 0L});
     }
     finally {
       // reset
diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
index 07ccfbd853..730cac72e7 100644
--- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
+++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
@@ -76,6 +76,8 @@ public class NestedDataTestUtils
   public static final String TYPES_DATA_FILE = "nested-types-test-data.json";
   public static final String ARRAY_TYPES_DATA_FILE = "nested-array-test-data.json";
 
+  public static final String ARRAY_TYPES_DATA_FILE_2 = "nested-array-test-data-2.json";
+
   public static final String INCREMENTAL_SEGMENTS_NAME = "incremental";
   public static final String DEFAULT_SEGMENTS_NAME = "segments";
   public static final String FRONT_CODED_SEGMENTS_NAME = "segments-frontcoded";
diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
index e1d1a79f72..de8c3036cc 100644
--- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
+++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
@@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.io.Closer;
 import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.math.expr.ExpressionProcessing;
 import org.apache.druid.query.Druids;
 import org.apache.druid.query.NestedDataTestUtils;
 import org.apache.druid.query.Query;
@@ -555,11 +556,7 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest
                                             .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
                                             .limit(100)
                                             .context(ImmutableMap.of());
-    if (NullHandling.replaceWithDefault()) {
-      // null elements are replaced with default values if druid.generic.useDefaultValueForNull=true
-      // ... but not until after they are persisted, so realtime query results don't match of course...
-      builder.columns("arrayString", "arrayLong", "arrayDouble");
-    }
+
     Query<ScanResultValue> scanQuery = builder.build();
     final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")};
     List<Segment> realtimeSegs = ImmutableList.of(
@@ -602,6 +599,138 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest
     Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
   }
 
+  @Test
+  public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryMoreArrayTypes() throws Exception
+  {
+    Druids.ScanQueryBuilder builder = Druids.newScanQueryBuilder()
+                                            .dataSource("test_datasource")
+                                            .intervals(
+                                                new MultipleIntervalSegmentSpec(
+                                                    Collections.singletonList(Intervals.ETERNITY)
+                                                )
+                                            )
+                                            .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                                            .limit(100)
+                                            .context(ImmutableMap.of());
+    Query<ScanResultValue> scanQuery = builder.build();
+    final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")};
+    List<Segment> realtimeSegs = ImmutableList.of(
+        NestedDataTestUtils.createIncrementalIndex(
+            tempFolder,
+            NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+            NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+            NestedDataTestUtils.TIMESTAMP_SPEC,
+            NestedDataTestUtils.AUTO_DISCOVERY,
+            TransformSpec.NONE,
+            aggs,
+            Granularities.NONE,
+            true
+        )
+    );
+    List<Segment> segs = NestedDataTestUtils.createSegments(
+        tempFolder,
+        closer,
+        NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+        NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+        NestedDataTestUtils.TIMESTAMP_SPEC,
+        NestedDataTestUtils.AUTO_DISCOVERY,
+        TransformSpec.NONE,
+        aggs,
+        Granularities.NONE,
+        true,
+        IndexSpec.DEFAULT
+    );
+
+
+    final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+    final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery);
+
+    List<ScanResultValue> resultsRealtime = seq.toList();
+    List<ScanResultValue> resultsSegments = seq2.toList();
+    logResults(resultsSegments);
+    logResults(resultsRealtime);
+    Assert.assertEquals(1, resultsRealtime.size());
+    Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
+    Assert.assertEquals(
+        "["
+        + "[978652800000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+        + "[978739200000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+        + "[978825600000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+        + "[978912000000, [A, A], [null, null], [1, 1], [0.1, 0.1], [true, true], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1]]",
+        resultsSegments.get(0).getEvents().toString()
+    );
+    Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
+  }
+
+  @Test
+  public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryMoreArrayTypesStrictBooleans() throws Exception
+  {
+    try {
+      ExpressionProcessing.initializeForStrictBooleansTests(true);
+      Druids.ScanQueryBuilder builder = Druids.newScanQueryBuilder()
+                                              .dataSource("test_datasource")
+                                              .intervals(
+                                                  new MultipleIntervalSegmentSpec(
+                                                      Collections.singletonList(Intervals.ETERNITY)
+                                                  )
+                                              )
+                                              .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                                              .limit(100)
+                                              .context(ImmutableMap.of());
+      Query<ScanResultValue> scanQuery = builder.build();
+      final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")};
+      List<Segment> realtimeSegs = ImmutableList.of(
+          NestedDataTestUtils.createIncrementalIndex(
+              tempFolder,
+              NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+              NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+              NestedDataTestUtils.TIMESTAMP_SPEC,
+              NestedDataTestUtils.AUTO_DISCOVERY,
+              TransformSpec.NONE,
+              aggs,
+              Granularities.NONE,
+              true
+          )
+      );
+      List<Segment> segs = NestedDataTestUtils.createSegments(
+          tempFolder,
+          closer,
+          NestedDataTestUtils.ARRAY_TYPES_DATA_FILE_2,
+          NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+          NestedDataTestUtils.TIMESTAMP_SPEC,
+          NestedDataTestUtils.AUTO_DISCOVERY,
+          TransformSpec.NONE,
+          aggs,
+          Granularities.NONE,
+          true,
+          IndexSpec.DEFAULT
+      );
+
+
+      final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+      final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery);
+
+      List<ScanResultValue> resultsRealtime = seq.toList();
+      List<ScanResultValue> resultsSegments = seq2.toList();
+      logResults(resultsSegments);
+      logResults(resultsRealtime);
+      Assert.assertEquals(1, resultsRealtime.size());
+      Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
+      Assert.assertEquals(
+          "["
+          + "[978652800000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+          + "[978739200000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+          + "[978825600000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1], "
+          + "[978912000000, [A, A], [null, null], [1, 1], [0.1, 0.1], [1, 1], [null, null], {s_str1=[A, A], s_str2=[null, null], s_num_int=[1, 1], s_num_float=[0.1, 0.1], s_bool=[true, true], s_null=[null, null]}, 1]]",
+          resultsSegments.get(0).getEvents().toString()
+      );
+      Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
+    }
+    finally {
+      ExpressionProcessing.initializeForTests();
+    }
+  }
+
   private static void logResults(List<ScanResultValue> results)
   {
     StringBuilder bob = new StringBuilder();
diff --git a/processing/src/test/resources/nested-array-test-data-2.json b/processing/src/test/resources/nested-array-test-data-2.json
new file mode 100644
index 0000000000..071ca07f54
--- /dev/null
+++ b/processing/src/test/resources/nested-array-test-data-2.json
@@ -0,0 +1,4 @@
+{"timestamp":"2001-01-05T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
+{"timestamp":"2001-01-06T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
+{"timestamp":"2001-01-07T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
+{"timestamp":"2001-01-08T00:00:00","s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null],"c1":{"s_str1":["A","A"],"s_str2":["null","null"],"s_num_int":[1,1],"s_num_float":[0.1,0.1],"s_bool":[true,true],"s_null":[null,null]}}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org