You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2023/05/11 11:43:35 UTC
[druid] branch master updated: add array_to_mv function to convert arrays into mvds to assist with migration from mvds to arrays (#14236)
This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new a58cebe491 add array_to_mv function to convert arrays into mvds to assist with migration from mvds to arrays (#14236)
a58cebe491 is described below
commit a58cebe4917dadc65ecd3143668288229a57d22a
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Thu May 11 04:43:28 2023 -0700
add array_to_mv function to convert arrays into mvds to assist with migration from mvds to arrays (#14236)
---
docs/querying/sql-array-functions.md | 8 +-
docs/querying/sql-multivalue-string-functions.md | 5 +-
.../main/java/org/apache/druid/math/expr/Expr.java | 8 +-
.../java/org/apache/druid/math/expr/Function.java | 63 ++++-
.../apache/druid/segment/column/ColumnType.java | 17 ++
.../org/apache/druid/math/expr/FunctionTest.java | 62 ++++-
...ArrayToMultiValueStringOperatorConversion.java} | 24 +-
.../MultiValueStringToArrayOperatorConversion.java | 8 +-
.../sql/calcite/planner/DruidOperatorTable.java | 2 +
.../sql/calcite/CalciteNestedDataQueryTest.java | 299 ++++++++++++++++++++-
10 files changed, 461 insertions(+), 35 deletions(-)
diff --git a/docs/querying/sql-array-functions.md b/docs/querying/sql-array-functions.md
index ed7d51b9e8..bc6bba63c0 100644
--- a/docs/querying/sql-array-functions.md
+++ b/docs/querying/sql-array-functions.md
@@ -36,8 +36,11 @@ sidebar_label: "Array functions"
This page describes the operations you can perform on arrays using [Druid SQL](./sql.md). See [`ARRAY` data type documentation](./sql-data-types.md#arrays) for additional details.
-All array references in the array function documentation can refer to multi-value string columns or `ARRAY` literals. These functions are largely
-identical to the [multi-value string functions](sql-multivalue-string-functions.md), but use `ARRAY` types and behavior.
+All array references in the array function documentation can refer to multi-value string columns or `ARRAY` literals.
+These functions are largely identical to the [multi-value string functions](sql-multivalue-string-functions.md), but
+use `ARRAY` types and behavior. Multi-value string `VARCHAR` columns can be converted to `VARCHAR ARRAY` to use with
+these functions using `MV_TO_ARRAY`, and `ARRAY` types can be converted to multi-value string `VARCHAR` with
+`ARRAY_TO_MV`.
|Function|Description|
|--------|-----|
@@ -55,3 +58,4 @@ identical to the [multi-value string functions](sql-multivalue-string-functions.
|`ARRAY_SLICE(arr, start, end)`|Returns the subarray of `arr` from the 0-based index `start` (inclusive) to `end` (exclusive). Returns `null`, if `start` is less than 0, greater than length of `arr`, or greater than `end`.|
|`ARRAY_TO_STRING(arr, str)`|Joins all elements of `arr` by the delimiter specified by `str`.|
|`STRING_TO_ARRAY(str1, str2)`|Splits `str1` into an array on the delimiter specified by `str2`.|
+|`ARRAY_TO_MV(arr)`|Converts an `ARRAY` of any type into a multi-value string `VARCHAR`.|
diff --git a/docs/querying/sql-multivalue-string-functions.md b/docs/querying/sql-multivalue-string-functions.md
index d0d9040a24..c73dd6297c 100644
--- a/docs/querying/sql-multivalue-string-functions.md
+++ b/docs/querying/sql-multivalue-string-functions.md
@@ -40,8 +40,9 @@ See [SQL multi-value strings](./sql-data-types.md#multi-value-strings) and nativ
All array references in the multi-value string function documentation can refer to multi-value string columns or
`ARRAY` types. These functions are largely identical to the [array functions](./sql-array-functions.md), but use
-`VARCHAR` types and behavior. Multi-value strings can also be converted to `ARRAY` types using `MV_TO_ARRAY`. For
-additional details about `ARRAY` types, see [`ARRAY` data type documentation](./sql-data-types.md#arrays).
+`VARCHAR` types and behavior. Multi-value strings can also be converted to `ARRAY` types using `MV_TO_ARRAY`, and
+`ARRAY` into multi-value strings via `ARRAY_TO_MV`. For additional details about `ARRAY` types, see
+[`ARRAY` data type documentation](./sql-data-types.md#arrays).
|Function|Description|
|--------|-----|
diff --git a/processing/src/main/java/org/apache/druid/math/expr/Expr.java b/processing/src/main/java/org/apache/druid/math/expr/Expr.java
index 9844a0526b..50eabf3dac 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/Expr.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/Expr.java
@@ -224,7 +224,7 @@ public interface Expr extends Cacheable
if (argType == null) {
continue;
}
- numeric &= argType.isNumeric();
+ numeric = numeric && argType.isNumeric();
}
return numeric;
}
@@ -265,7 +265,7 @@ public interface Expr extends Cacheable
if (currentType == null) {
currentType = argType;
}
- allSame &= Objects.equals(argType, currentType);
+ allSame = allSame && Objects.equals(argType, currentType);
}
return allSame;
}
@@ -302,7 +302,7 @@ public interface Expr extends Cacheable
if (argType == null) {
continue;
}
- scalar &= argType.isPrimitive();
+ scalar = scalar && argType.isPrimitive();
}
return scalar;
}
@@ -330,7 +330,7 @@ public interface Expr extends Cacheable
{
boolean canVectorize = true;
for (Expr arg : args) {
- canVectorize &= arg.canVectorize(this);
+ canVectorize = canVectorize && arg.canVectorize(this);
}
return canVectorize;
}
diff --git a/processing/src/main/java/org/apache/druid/math/expr/Function.java b/processing/src/main/java/org/apache/druid/math/expr/Function.java
index b936a4adda..632425d1c7 100644
--- a/processing/src/main/java/org/apache/druid/math/expr/Function.java
+++ b/processing/src/main/java/org/apache/druid/math/expr/Function.java
@@ -2922,7 +2922,7 @@ public interface Function extends NamedFunction
}
}
- class MVToArrayFunction implements Function
+ class MultiValueStringToArrayFunction implements Function
{
@Override
public String name()
@@ -2981,6 +2981,67 @@ public interface Function extends NamedFunction
return ImmutableSet.copyOf(args);
}
}
+
+ class ArrayToMultiValueStringFunction implements Function
+ {
+ @Override
+ public String name()
+ {
+ return "array_to_mv";
+ }
+
+ @Override
+ public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
+ {
+ return args.get(0).eval(bindings).castTo(ExpressionType.STRING_ARRAY);
+ }
+
+ @Override
+ public void validateArguments(List<Expr> args)
+ {
+ validationHelperCheckArgumentCount(args, 1);
+ IdentifierExpr expr = args.get(0).getIdentifierExprIfIdentifierExpr();
+
+ if (expr == null) {
+ throw validationFailed(
+ "argument %s should be an identifier expression. Use array() instead",
+ args.get(0).toString()
+ );
+ }
+ }
+
+ @Nullable
+ @Override
+ public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
+ {
+ return ExpressionType.STRING_ARRAY;
+ }
+
+ @Override
+ public boolean hasArrayInputs()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean hasArrayOutput()
+ {
+ return true;
+ }
+
+ @Override
+ public Set<Expr> getScalarInputs(List<Expr> args)
+ {
+ return Collections.emptySet();
+ }
+
+ @Override
+ public Set<Expr> getArrayInputs(List<Expr> args)
+ {
+ return ImmutableSet.copyOf(args);
+ }
+ }
+
class ArrayConstructorFunction implements Function
{
@Override
diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java
index 14e044b830..9891bac9b4 100644
--- a/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java
+++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java
@@ -135,6 +135,23 @@ public class ColumnType extends BaseTypeSignature<ValueType>
return ColumnTypeFactory.getInstance().ofComplex(complexTypeName);
}
+ /**
+ * Finds the type that can best represent both types, or none if there is no type information.
+ * If either type is null, the other type is returned. If both types are null, this method returns null as we cannot
+ * determine any useful type information. If the types are {@link ValueType#COMPLEX}, they must be the same complex
+ * type, else this function throws a {@link IllegalArgumentException} as the types are truly incompatible, with the
+ * exception of {@link ColumnType#NESTED_DATA} which is complex and represents nested AND mixed type data so is
+ * instead treated as the 'least restrictive type' if present. If both types are {@link ValueType#ARRAY}, the result
+ * is an array of the result of calling this method again on {@link ColumnType#elementType}. If only one type is an
+ * array, the result is an array type of calling this method on the non-array type and the array element type. After
+ * arrays, if either type is {@link ValueType#STRING}, the result is {@link ValueType#STRING}. If both types are
+ * numeric, then the result will be {@link ValueType#LONG} if both are longs, {@link ValueType#FLOAT} if both are
+ * floats, else {@link ValueType#DOUBLE}.
+ *
+ * @see org.apache.druid.math.expr.ExpressionTypeConversion#function for a similar method used for expression type
+ * inference
+ */
+ @Nullable
public static ColumnType leastRestrictiveType(@Nullable ColumnType type, @Nullable ColumnType other)
{
if (type == null) {
diff --git a/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java
index d6304e94c3..e3a7fa909c 100644
--- a/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java
+++ b/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java
@@ -1007,14 +1007,16 @@ public class FunctionTest extends InitializedNullHandlingTest
}
@Test
- public void testMVToArrayWithValidInputs()
+ public void testMultiValueStringToArrayWithValidInputs()
{
assertArrayExpr("mv_to_array(x)", new String[]{"foo"});
assertArrayExpr("mv_to_array(a)", new String[]{"foo", "bar", "baz", "foobar"});
+ assertArrayExpr("mv_to_array(b)", new String[]{"1", "2", "3", "4", "5"});
+ assertArrayExpr("mv_to_array(c)", new String[]{"3.1", "4.2", "5.3"});
}
@Test
- public void testMVToArrayWithConstantLiteral()
+ public void testMultiValueStringToArrayWithInvalidInputs()
{
Throwable t = Assert.assertThrows(
ExpressionValidationException.class,
@@ -1024,12 +1026,8 @@ public class FunctionTest extends InitializedNullHandlingTest
"Function[mv_to_array] argument 1 should be an identifier expression. Use array() instead",
t.getMessage()
);
- }
- @Test
- public void testMVToArrayWithFunction()
- {
- Throwable t = Assert.assertThrows(
+ t = Assert.assertThrows(
ExpressionValidationException.class,
() -> assertArrayExpr("mv_to_array(repeat('hello', 2))", null)
);
@@ -1037,12 +1035,8 @@ public class FunctionTest extends InitializedNullHandlingTest
"Function[mv_to_array] argument (repeat [hello, 2]) should be an identifier expression. Use array() instead",
t.getMessage()
);
- }
- @Test
- public void testMVToArrayWithMoreArgs()
- {
- Throwable t = Assert.assertThrows(
+ t = Assert.assertThrows(
ExpressionValidationException.class,
() -> assertArrayExpr("mv_to_array(x,y)", null)
);
@@ -1050,12 +1044,54 @@ public class FunctionTest extends InitializedNullHandlingTest
"Function[mv_to_array] requires 1 argument",
t.getMessage()
);
+
+ t = Assert.assertThrows(
+ ExpressionValidationException.class,
+ () -> assertArrayExpr("mv_to_array()", null)
+ );
+ Assert.assertEquals(
+ "Function[mv_to_array] requires 1 argument",
+ t.getMessage()
+ );
}
@Test
- public void testMVToArrayWithNoArgs()
+ public void testArrayToMultiValueStringWithValidInputs()
+ {
+ assertArrayExpr("array_to_mv(x)", new String[]{"foo"});
+ assertArrayExpr("array_to_mv(a)", new String[]{"foo", "bar", "baz", "foobar"});
+ assertArrayExpr("array_to_mv(b)", new String[]{"1", "2", "3", "4", "5"});
+ assertArrayExpr("array_to_mv(c)", new String[]{"3.1", "4.2", "5.3"});
+ }
+
+ @Test
+ public void testArrayToMultiValueStringWithInvalidInputs()
{
Throwable t = Assert.assertThrows(
+ ExpressionValidationException.class,
+ () -> assertArrayExpr("mv_to_array('1')", null)
+ );
+ Assert.assertEquals(
+ "Function[mv_to_array] argument 1 should be an identifier expression. Use array() instead",
+ t.getMessage()
+ );
+ t = Assert.assertThrows(
+ ExpressionValidationException.class,
+ () -> assertArrayExpr("mv_to_array(repeat('hello', 2))", null)
+ );
+ Assert.assertEquals(
+ "Function[mv_to_array] argument (repeat [hello, 2]) should be an identifier expression. Use array() instead",
+ t.getMessage()
+ );
+ t = Assert.assertThrows(
+ ExpressionValidationException.class,
+ () -> assertArrayExpr("mv_to_array(x,y)", null)
+ );
+ Assert.assertEquals(
+ "Function[mv_to_array] requires 1 argument",
+ t.getMessage()
+ );
+ t = Assert.assertThrows(
ExpressionValidationException.class,
() -> assertArrayExpr("mv_to_array()", null)
);
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToMultiValueStringOperatorConversion.java
similarity index 73%
copy from sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java
copy to sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToMultiValueStringOperatorConversion.java
index b1135b8842..551df41b97 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayToMultiValueStringOperatorConversion.java
@@ -27,22 +27,24 @@ import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.druid.sql.calcite.expression.DirectOperatorConversion;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
-/**
- * Function that converts a String or a Multi Value direct column to an array.
- * Input expressions are not supported as one should use the array function for such cases.
- **/
-
-public class MultiValueStringToArrayOperatorConversion extends DirectOperatorConversion
+public class ArrayToMultiValueStringOperatorConversion extends DirectOperatorConversion
{
public static final SqlFunction SQL_FUNCTION = OperatorConversions
- .operatorBuilder("MV_TO_ARRAY")
- .operandTypeChecker(OperandTypes.family(SqlTypeFamily.STRING))
+ .operatorBuilder("ARRAY_TO_MV")
+ .operandTypeChecker(
+ OperandTypes.or(
+ OperandTypes.family(SqlTypeFamily.STRING),
+ OperandTypes.family(SqlTypeFamily.ARRAY)
+ )
+ )
.functionCategory(SqlFunctionCategory.STRING)
- .returnTypeNullableArrayWithNullableElements(SqlTypeName.VARCHAR)
+ .returnTypeNullable(SqlTypeName.VARCHAR)
.build();
- public MultiValueStringToArrayOperatorConversion()
+ public ArrayToMultiValueStringOperatorConversion()
{
- super(SQL_FUNCTION, "mv_to_array");
+ super(SQL_FUNCTION, "array_to_mv");
}
+
+
}
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java
index b1135b8842..9a59ca8f5b 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java
@@ -36,7 +36,13 @@ public class MultiValueStringToArrayOperatorConversion extends DirectOperatorCon
{
public static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder("MV_TO_ARRAY")
- .operandTypeChecker(OperandTypes.family(SqlTypeFamily.STRING))
+ // allow using arrays as inputs to MV_TO_ARRAY to assist with migration of MVDs to ARRAY types
+ .operandTypeChecker(
+ OperandTypes.or(
+ OperandTypes.family(SqlTypeFamily.STRING),
+ OperandTypes.family(SqlTypeFamily.ARRAY)
+ )
+ )
.functionCategory(SqlFunctionCategory.STRING)
.returnTypeNullableArrayWithNullableElements(SqlTypeName.VARCHAR)
.build();
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
index ed52c95e6f..e476b92cae 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
@@ -68,6 +68,7 @@ import org.apache.druid.sql.calcite.expression.builtin.ArrayOverlapOperatorConve
import org.apache.druid.sql.calcite.expression.builtin.ArrayPrependOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.ArrayQuantileOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.ArraySliceOperatorConversion;
+import org.apache.druid.sql.calcite.expression.builtin.ArrayToMultiValueStringOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.ArrayToStringOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.BTrimOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.CaseOperatorConversion;
@@ -244,6 +245,7 @@ public class DruidOperatorTable implements SqlOperatorTable
.add(new ArraySliceOperatorConversion())
.add(new ArrayToStringOperatorConversion())
.add(new StringToArrayOperatorConversion())
+ .add(new ArrayToMultiValueStringOperatorConversion())
.build();
private static final List<SqlOperatorConversion> MULTIVALUE_STRING_OPERATOR_CONVERSIONS =
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 64014aae32..3782dba92f 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -979,7 +979,6 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
)
.run();
}
-
@Test
public void testUnnestRootSingleTypeArrayStringNulls()
{
@@ -4843,4 +4842,302 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.build()
);
}
+
+ /**
+ * MVD version of {@link #testGroupByRootSingleTypeArrayLongNullsUnnest()}
+ */
+ @Test
+ public void testGroupByRootSingleTypeArrayLongNullsAsMvd()
+ {
+ cannotVectorize();
+ testBuilder()
+ .sql(
+ "SELECT "
+ + "ARRAY_TO_MV(arrayLongNulls), "
+ + "SUM(cnt) "
+ + "FROM druid.arrays GROUP BY 1"
+ )
+ .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .expectedQueries(
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(TableDataSource.create(DATA_SOURCE_ARRAYS))
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
+ )
+ )
+ .setVirtualColumns(expressionVirtualColumn("v0", "array_to_mv(\"arrayLongNulls\")", ColumnType.STRING))
+ .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ )
+ )
+ .expectedResults(
+ ImmutableList.of(
+ // implicit mvd unnest treats null and empty as [null] so we get extra null matches than unnest
+ // directly on the ARRAY
+ new Object[]{NullHandling.defaultStringValue(), 9L},
+ new Object[]{"1", 5L},
+ new Object[]{"2", 6L},
+ new Object[]{"3", 6L},
+ new Object[]{"9", 2L}
+ )
+ )
+ .expectedSignature(
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.STRING)
+ .add("EXPR$1", ColumnType.LONG)
+ .build()
+ )
+ .run();
+ }
+
+ /**
+ * MVD version of {@link #testGroupByRootSingleTypeArrayLongNullsFiltered()}
+ * - implicit unnest since it is an mvd instead of array grouping
+ * - filters are adjusted to match strings instead of numbers
+ */
+ @Test
+ public void testGroupByRootSingleTypeArrayLongNullsAsMvdFiltered()
+ {
+ cannotVectorize();
+ testBuilder()
+ .sql(
+ "SELECT "
+ + "ARRAY_TO_MV(arrayLongNulls), "
+ + "SUM(cnt), "
+ + "SUM(MV_LENGTH(ARRAY_TO_MV(arrayLongNulls))) "
+ + "FROM druid.arrays "
+ + "WHERE MV_CONTAINS(ARRAY_TO_MV(arrayLongNulls), '1') "
+ + "GROUP BY 1"
+ )
+ .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .expectedQueries(
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(DATA_SOURCE_ARRAYS)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
+ )
+ )
+ .setVirtualColumns(
+ expressionVirtualColumn("v0", "array_to_mv(\"arrayLongNulls\")", ColumnType.STRING),
+ expressionVirtualColumn("v1", "array_length(array_to_mv(\"arrayLongNulls\"))", ColumnType.LONG)
+ )
+ .setDimFilter(
+ new ExpressionDimFilter("array_contains(array_to_mv(\"arrayLongNulls\"),'1')", queryFramework().macroTable())
+ )
+ .setAggregatorSpecs(
+ aggregators(
+ new LongSumAggregatorFactory("a0", "cnt"),
+ new LongSumAggregatorFactory("a1", "v1")
+ )
+ )
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ )
+ )
+ .expectedResults(
+ ImmutableList.of(
+ new Object[]{NullHandling.defaultStringValue(), 2L, 6L},
+ new Object[]{"1", 5L, 13L},
+ new Object[]{"2", 2L, 6L},
+ new Object[]{"3", 4L, 12L}
+ )
+ )
+ .expectedSignature(
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.STRING)
+ .add("EXPR$1", ColumnType.LONG)
+ .add("EXPR$2", ColumnType.LONG)
+ .build()
+ )
+ .run();
+ }
+
+ /**
+ * MVD version of {@link #testGroupByRootSingleTypeArrayLongNullsFilteredMore()}
+ * - implicit unnest since it is an mvd instead of array grouping
+ * - filters are adjusted to match strings instead of numbers
+ */
+ @Test
+ public void testGroupByRootSingleTypeArrayLongNullsAsMvdFilteredMore()
+ {
+ cannotVectorize();
+ testBuilder()
+ .sql(
+ "SELECT "
+ + "ARRAY_TO_MV(arrayLongNulls), "
+ + "SUM(cnt) "
+ + "FROM druid.arrays WHERE MV_CONTAINS(ARRAY_TO_MV(arrayLongNulls), '1') OR MV_OVERLAP(ARRAY_TO_MV(arrayLongNulls), ARRAY['2', '3']) GROUP BY 1"
+ )
+ .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .expectedQueries(
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(DATA_SOURCE_ARRAYS)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
+ )
+ )
+ .setVirtualColumns(
+ expressionVirtualColumn("v0", "array_to_mv(\"arrayLongNulls\")", ColumnType.STRING)
+ )
+ .setDimFilter(
+ or(
+ expressionFilter("array_contains(array_to_mv(\"arrayLongNulls\"),'1')"),
+ expressionFilter("array_overlap(array_to_mv(\"arrayLongNulls\"),array('2','3'))")
+ )
+ )
+ .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ )
+ )
+ .expectedResults(
+ // since array is converted to a MVD, implicit unnesting occurs
+ ImmutableList.of(
+ new Object[]{NullHandling.defaultStringValue(), 4L},
+ new Object[]{"1", 5L},
+ new Object[]{"2", 6L},
+ new Object[]{"3", 6L},
+ new Object[]{"9", 2L}
+ )
+ )
+ .expectedSignature(
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.STRING)
+ .add("EXPR$1", ColumnType.LONG)
+ .build()
+ )
+ .run();
+ }
+
+ /**
+ * MVD version of {@link #testGroupByRootSingleTypeArrayStringNullsUnnest()}
+ */
+ @Test
+ public void testGroupByRootSingleTypeArrayStringNullsAsMvdUnnest()
+ {
+ cannotVectorize();
+ testBuilder()
+ .sql(
+ "SELECT "
+ + "ARRAY_TO_MV(arrayStringNulls), "
+ + "SUM(cnt) "
+ + "FROM druid.arrays GROUP BY 1"
+ )
+ .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .expectedQueries(
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(
+ TableDataSource.create(DATA_SOURCE_ARRAYS)
+ )
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
+ )
+ )
+ .setVirtualColumns(
+ expressionVirtualColumn("v0", "array_to_mv(\"arrayStringNulls\")", ColumnType.STRING)
+ )
+ .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ )
+ )
+ .expectedResults(
+ ImmutableList.of(
+ // count is 9 instead of 5 because implicit unnest treats null and empty as [null]
+ new Object[]{NullHandling.defaultStringValue(), 9L},
+ new Object[]{"a", 3L},
+ new Object[]{"b", 11L},
+ new Object[]{"d", 2L}
+ )
+ )
+ .expectedSignature(
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.STRING)
+ .add("EXPR$1", ColumnType.LONG)
+ .build()
+ )
+ .run();
+ }
+
+ /**
+ * MVD version of {@link #testGroupByRootSingleTypeArrayStringNullsFiltered()}
+ * - implicit unnest since mvd instead of string array
+ */
+ @Test
+ public void testGroupByRootSingleTypeArrayStringNullsFilteredAsMvd()
+ {
+ cannotVectorize();
+ testBuilder()
+ .sql(
+ "SELECT "
+ + "ARRAY_TO_MV(arrayStringNulls), "
+ + "SUM(cnt), "
+ + "SUM(MV_LENGTH(ARRAY_TO_MV(arrayStringNulls))) "
+ + "FROM druid.arrays "
+ + "WHERE MV_CONTAINS(ARRAY_TO_MV(arrayStringNulls), 'b') "
+ + "GROUP BY 1"
+ )
+ .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .expectedQueries(
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(DATA_SOURCE_ARRAYS)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
+ )
+ )
+ .setVirtualColumns(
+ expressionVirtualColumn("v0", "array_to_mv(\"arrayStringNulls\")", ColumnType.STRING),
+ new ExpressionVirtualColumn("v1", "array_length(array_to_mv(\"arrayStringNulls\"))", ColumnType.LONG, queryFramework().macroTable())
+ )
+ .setDimFilter(
+ new ExpressionDimFilter("array_contains(array_to_mv(\"arrayStringNulls\"),'b')", queryFramework().macroTable())
+ )
+ .setAggregatorSpecs(
+ aggregators(
+ new LongSumAggregatorFactory("a0", "cnt"),
+ new LongSumAggregatorFactory("a1", "v1")
+ )
+ )
+ .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ )
+ )
+ .expectedResults(
+ ImmutableList.of(
+ new Object[]{NullHandling.defaultStringValue(), 4L, 10L},
+ new Object[]{"a", 3L, 6L},
+ new Object[]{"b", 11L, 24L},
+ new Object[]{"d", 2L, 6L}
+ )
+ )
+ .expectedSignature(
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.STRING)
+ .add("EXPR$1", ColumnType.LONG)
+ .add("EXPR$2", ColumnType.LONG)
+ .build()
+ )
+ .run();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org