You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/05/11 10:14:49 UTC

[druid] branch 26.0.0 updated: fix npe regression in json_value when filtering non-existent paths (#14250) (#14254)

This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch 26.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/26.0.0 by this push:
     new c3faf7467f fix npe regression in json_value when filtering non-existent paths (#14250) (#14254)
c3faf7467f is described below

commit c3faf7467f7249ddb957d2a24cadc3faae14002a
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Thu May 11 03:14:34 2023 -0700

    fix npe regression in json_value when filtering non-existent paths (#14250) (#14254)
    
    * fix npe regression in json_value when filtering non-existent paths
    
    * more coverage
---
 .../segment/virtual/NestedFieldVirtualColumn.java  |   2 +-
 .../apache/druid/query/NestedDataTestUtils.java    |  18 ++-
 .../query/groupby/NestedDataGroupByQueryTest.java  | 167 +++++++++++----------
 3 files changed, 104 insertions(+), 83 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index c7ef30e883..486448c2e9 100644
--- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -1120,7 +1120,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn
         final Set<ColumnType> types = nestedColumn.getColumnTypes(parts);
         // if the expected output type is numeric but not all of the input types are numeric, we might have additional
         // null values than what the null value bitmap is tracking, wrap it
-        if (expectedType.isNumeric() && types.stream().anyMatch(t -> !t.isNumeric())) {
+        if (expectedType.isNumeric() && (types == null || types.stream().anyMatch(t -> !t.isNumeric()))) {
           return NoIndexesColumnIndexSupplier.getInstance();
         }
       }
diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
index 1d8b9a0e65..07ccfbd853 100644
--- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
+++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java
@@ -76,6 +76,11 @@ public class NestedDataTestUtils
   public static final String TYPES_DATA_FILE = "nested-types-test-data.json";
   public static final String ARRAY_TYPES_DATA_FILE = "nested-array-test-data.json";
 
+  public static final String INCREMENTAL_SEGMENTS_NAME = "incremental";
+  public static final String DEFAULT_SEGMENTS_NAME = "segments";
+  public static final String FRONT_CODED_SEGMENTS_NAME = "segments-frontcoded";
+  public static final String MIX_SEGMENTS_NAME = "mixed";
+
   public static final ObjectMapper JSON_MAPPER;
 
   public static final TimestampSpec TIMESTAMP_SPEC = new TimestampSpec("timestamp", null, null);
@@ -519,7 +524,7 @@ public class NestedDataTestUtils
       @Override
       public String toString()
       {
-        return "mixed";
+        return MIX_SEGMENTS_NAME;
       }
     });
     segmentsGenerators.add(new BiFunction<TemporaryFolder, Closer, List<Segment>>()
@@ -541,7 +546,7 @@ public class NestedDataTestUtils
       @Override
       public String toString()
       {
-        return "incremental";
+        return INCREMENTAL_SEGMENTS_NAME;
       }
     });
     segmentsGenerators.add(new BiFunction<TemporaryFolder, Closer, List<Segment>>()
@@ -577,7 +582,7 @@ public class NestedDataTestUtils
       @Override
       public String toString()
       {
-        return "segments";
+        return DEFAULT_SEGMENTS_NAME;
       }
     });
     segmentsGenerators.add(new BiFunction<TemporaryFolder, Closer, List<Segment>>()
@@ -621,9 +626,14 @@ public class NestedDataTestUtils
       @Override
       public String toString()
       {
-        return "segments-frontcoded";
+        return FRONT_CODED_SEGMENTS_NAME;
       }
     });
     return segmentsGenerators;
   }
+
+  public static boolean expectSegmentGeneratorCanVectorize(String name)
+  {
+    return DEFAULT_SEGMENTS_NAME.equals(name) || FRONT_CODED_SEGMENTS_NAME.equals(name);
+  }
 }
diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
index c3a1552770..dae2363fdd 100644
--- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
+++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java
@@ -34,6 +34,7 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory;
 import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
 import org.apache.druid.query.dimension.DefaultDimensionSpec;
 import org.apache.druid.query.expression.TestExprMacroTable;
+import org.apache.druid.query.filter.Filter;
 import org.apache.druid.query.filter.InDimFilter;
 import org.apache.druid.query.filter.SelectorDimFilter;
 import org.apache.druid.query.groupby.strategy.GroupByStrategySelector;
@@ -56,6 +57,7 @@ import org.junit.runners.Parameterized;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.function.BiFunction;
@@ -70,7 +72,6 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
   public final TemporaryFolder tempFolder = new TemporaryFolder();
 
   private final Closer closer;
-  private final GroupByQueryConfig config;
   private final QueryContexts.Vectorize vectorize;
   private final AggregationTestHelper helper;
   private final BiFunction<TemporaryFolder, Closer, List<Segment>> segmentsGenerator;
@@ -83,7 +84,6 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
   )
   {
     NestedDataModule.registerHandlersAndSerde();
-    this.config = config;
     this.vectorize = QueryContexts.Vectorize.fromString(vectorize);
     this.helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(
         NestedDataModule.getJacksonModulesList(),
@@ -111,9 +111,11 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
         NestedDataTestUtils.getSegmentGenerators(NestedDataTestUtils.SIMPLE_DATA_FILE);
 
     for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) {
-      for (BiFunction<TemporaryFolder, Closer, List<Segment>> generatorFn : segmentsGenerators) {
-        for (String vectorize : new String[]{"false", "true", "force"}) {
-          constructors.add(new Object[]{config, generatorFn, vectorize});
+      if (!GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) {
+        for (BiFunction<TemporaryFolder, Closer, List<Segment>> generatorFn : segmentsGenerators) {
+          for (String vectorize : new String[]{"false", "true", "force"}) {
+            constructors.add(new Object[]{config, generatorFn, vectorize});
+          }
         }
       }
     }
@@ -195,9 +197,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
             new Object[]{"100", null, 100L, 1L, "1", null, 2L},
             new Object[]{"hello", null, null, 1L, "1", null, 12L},
             new Object[]{"world", null, null, 1L, "1", null, 2L}
-        ),
-        "incremental".equals(segmentsName),
-        true
+        )
     );
   }
 
@@ -307,12 +307,48 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
         groupQuery,
         NullHandling.sqlCompatible()
         ? ImmutableList.of(new Object[]{null, 16L})
-        : ImmutableList.of(new Object[]{"foo", 16L}),
-        true,
-        false
+        : ImmutableList.of(new Object[]{"foo", 16L})
     );
   }
 
+  @Test
+  public void testGroupByNonExistentFilterAsString()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0"))
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("nest", "$.fake", "v0", ColumnType.STRING)
+                                          )
+                                          .setDimFilter(new SelectorDimFilter("v0", "1", null))
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .build();
+
+    runResults(groupQuery, Collections.emptyList());
+  }
+
+  @Test
+  public void testGroupByNonExistentFilterAsNumeric()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0"))
+                                          .setVirtualColumns(
+                                              new NestedFieldVirtualColumn("nest", "$.fake", "v0", ColumnType.LONG)
+                                          )
+                                          .setDimFilter(new SelectorDimFilter("v0", "1", null))
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .build();
+
+    runResults(groupQuery, Collections.emptyList());
+  }
+
   @Test
   public void testGroupBySomeFieldOnStringColumn()
   {
@@ -390,12 +426,28 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
         groupQuery,
         ImmutableList.of(
             new Object[]{100L, 2L}
-        ),
-        false,
-        true
+        )
     );
   }
 
+  @Test
+  public void testGroupBySomeFieldOnNestedStringColumnWithFilterExpectedTypeLong()
+  {
+    GroupByQuery groupQuery = GroupByQuery.builder()
+                                          .setDataSource("test_datasource")
+                                          .setGranularity(Granularities.ALL)
+                                          .setInterval(Intervals.ETERNITY)
+                                          .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.LONG))
+                                          .setVirtualColumns(new NestedFieldVirtualColumn("nester", "$.y.a", "v0", ColumnType.LONG))
+                                          .setAggregatorSpecs(new CountAggregatorFactory("count"))
+                                          .setContext(getContext())
+                                          .setDimFilter(new SelectorDimFilter("v0", "100", null))
+                                          .build();
+
+
+    runResults(groupQuery, Collections.emptyList());
+  }
+
   @Test
   public void testGroupBySomeFieldOnStringColumnWithFilterExpectedTypeDouble()
   {
@@ -419,9 +471,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
         groupQuery,
         ImmutableList.of(
             new Object[]{100.0, 2L}
-        ),
-        false,
-        true
+        )
     );
   }
 
@@ -448,9 +498,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
         groupQuery,
         ImmutableList.of(
             new Object[]{100f, 2L}
-        ),
-        false,
-        true
+        )
     );
   }
 
@@ -504,9 +552,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
         ImmutableList.of(
             new Object[]{1672531200000L, NullHandling.defaultLongValue(), 8L},
             new Object[]{1672617600000L, NullHandling.defaultLongValue(), 8L}
-        ),
-        false,
-        true
+        )
     );
   }
 
@@ -529,14 +575,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
                                           .build();
 
 
-    runResults(
-        groupQuery,
-        ImmutableList.of(
-            new Object[]{1672531200000L, 8L}
-        ),
-        false,
-        true
-    );
+    runResults(groupQuery, ImmutableList.of(new Object[]{1672531200000L, 8L}));
   }
 
   @Test
@@ -558,14 +597,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
                                           .build();
 
 
-    runResults(
-        groupQuery,
-        ImmutableList.of(
-            new Object[]{"1672531200000", 8L}
-        ),
-        true,
-        false
-    );
+    runResults(groupQuery, ImmutableList.of(new Object[]{"1672531200000", 8L}));
   }
 
   @Test
@@ -589,53 +621,32 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest
 
     runResults(
         groupQuery,
-        ImmutableList.of(),
-        false,
-        true
+        ImmutableList.of()
     );
   }
 
-  private void runResults(GroupByQuery groupQuery, List<Object[]> expectedResults)
-  {
-    runResults(groupQuery, expectedResults, false, false);
-  }
-
   private void runResults(
       GroupByQuery groupQuery,
-      List<Object[]> expectedResults,
-      boolean hasUnknownCardinality,
-      boolean hasNonStringOutput
+      List<Object[]> expectedResults
   )
   {
+    List<Segment> segments = segmentsGenerator.apply(tempFolder, closer);
     Supplier<List<ResultRow>> runner =
-        () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(tempFolder, closer), groupQuery).toList();
-    if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) {
-      if (hasUnknownCardinality) {
-        Throwable t = Assert.assertThrows(RuntimeException.class, runner::get);
-        Assert.assertEquals(
-            "java.lang.UnsupportedOperationException: GroupBy v1 does not support dimension selectors with unknown cardinality.",
-            t.getMessage()
-        );
-        return;
-      }
-      if (hasNonStringOutput) {
-        Throwable t = Assert.assertThrows(RuntimeException.class, runner::get);
-        Assert.assertEquals(
-            "java.lang.UnsupportedOperationException: GroupBy v1 only supports dimensions with an outputType of STRING.",
-            t.getMessage()
-        );
-        return;
-      }
-    }
-    if (!"segments".equals(segmentsName) && !"segments-frontcoded".equals(segmentsName)) {
-      if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) {
-        Throwable t = Assert.assertThrows(RuntimeException.class, runner::get);
-        Assert.assertEquals(
-            "java.lang.UnsupportedOperationException: GroupBy v1 does not support dimension selectors with unknown cardinality.",
-            t.getMessage()
-        );
-        return;
-      } else if (vectorize == QueryContexts.Vectorize.FORCE) {
+        () -> helper.runQueryOnSegmentsObjs(segments, groupQuery).toList();
+    Filter filter = groupQuery.getFilter() == null ? null : groupQuery.getFilter().toFilter();
+    boolean allCanVectorize = segments.stream()
+                                      .allMatch(
+                                          s -> s.asStorageAdapter()
+                                                .canVectorize(
+                                                    filter,
+                                                    groupQuery.getVirtualColumns(),
+                                                    groupQuery.isDescending()
+                                                )
+                                      );
+
+    Assert.assertEquals(NestedDataTestUtils.expectSegmentGeneratorCanVectorize(segmentsName), allCanVectorize);
+    if (!allCanVectorize) {
+      if (vectorize == QueryContexts.Vectorize.FORCE) {
         Throwable t = Assert.assertThrows(RuntimeException.class, runner::get);
         Assert.assertEquals(
             "java.util.concurrent.ExecutionException: java.lang.RuntimeException: org.apache.druid.java.util.common.ISE: Cannot vectorize!",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org