You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by fj...@apache.org on 2019/05/21 18:54:44 UTC

[incubator-druid] branch master updated: SQL: Allow NULLs in place of optional arguments in many functions. (#7709)

This is an automated email from the ASF dual-hosted git repository.

fjy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/master by this push:
     new cbbce95  SQL: Allow NULLs in place of optional arguments in many functions. (#7709)
cbbce95 is described below

commit cbbce955de588330797fda986dd1031fceb3b679
Author: Gian Merlino <gi...@imply.io>
AuthorDate: Tue May 21 11:54:34 2019 -0700

    SQL: Allow NULLs in place of optional arguments in many functions. (#7709)
    
    * SQL: Allow NULLs in place of optional arguments in many functions.
    
    Also adjust SQL docs to describe how to make time literals using
    TIME_PARSE (which is now possible in a nicer way).
    
    * Be less forbidden.
---
 docs/content/querying/sql.md                       |  15 +-
 .../calcite/expression/OperatorConversions.java    | 194 ++++++++++++++++++++-
 .../apache/druid/sql/calcite/CalciteQueryTest.java |  35 ++++
 3 files changed, 232 insertions(+), 12 deletions(-)

diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md
index 5b6e308..5d562f6 100644
--- a/docs/content/querying/sql.md
+++ b/docs/content/querying/sql.md
@@ -214,6 +214,10 @@ context parameter "sqlTimeZone" to the name of another time zone, like "America/
 the connection time zone, some functions also accept time zones as parameters. These parameters always take precedence
 over the connection time zone.
 
+Literal timestamps in the connection time zone can be written using `TIMESTAMP '2000-01-01 00:00:00'` syntax. The
+simplest way to write literal timestamps in other time zones is to use TIME_PARSE, like
+`TIME_PARSE('2000-02-01 00:00:00', NULL, 'America/Los_Angeles')`.
+
 |Function|Notes|
 |--------|-----|
 |`CURRENT_TIMESTAMP`|Current timestamp in the connection's time zone.|
@@ -291,11 +295,12 @@ Additionally, some Druid features are not supported by the SQL language. Some un
 
 Druid natively supports five basic column types: "long" (64 bit signed int), "float" (32 bit float), "double" (64 bit
 float) "string" (UTF-8 encoded strings), and "complex" (catch-all for more exotic data types like hyperUnique and
-approxHistogram columns). Timestamps (including the `__time` column) are stored as longs, with the value being the
-number of milliseconds since 1 January 1970 UTC.
+approxHistogram columns).
 
-At runtime, Druid may widen 32-bit floats to 64-bit for certain operators, like SUM aggregators. The reverse will not
-happen: 64-bit floats are not be narrowed to 32-bit.
+Timestamps (including the `__time` column) are treated by Druid as longs, with the value being the number of
+milliseconds since 1970-01-01 00:00:00 UTC, not counting leap seconds. Therefore, timestamps in Druid do not carry any
+timezone information, but only carry information about the exact moment in time they represent. See the
+[Time functions](#time-functions) section for more information about timestamp handling.
 
 Druid generally treats NULLs and empty strings interchangeably, rather than according to the SQL standard. As such,
 Druid SQL only has partial support for NULLs. For example, the expressions `col IS NULL` and `col = ''` are equivalent,
@@ -307,7 +312,7 @@ datasource, then it will be treated as zero for rows from those segments.
 
 For mathematical operations, Druid SQL will use integer math if all operands involved in an expression are integers.
 Otherwise, Druid will switch to floating point math. You can force this to happen by casting one of your operands
-to FLOAT.
+to FLOAT. At runtime, Druid may widen 32-bit floats to 64-bit for certain operators, like SUM aggregators.
 
 The following table describes how SQL types map onto Druid types during query runtime. Casts between two SQL types
 that have the same Druid runtime type will have no effect, other than exceptions noted in the table. Casts between two
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java
index a4dbfd2..8a73cbd 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java
@@ -20,18 +20,29 @@
 package org.apache.druid.sql.calcite.expression;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import it.unimi.dsi.fastutil.ints.IntArraySet;
+import it.unimi.dsi.fastutil.ints.IntSet;
+import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlCallBinding;
 import org.apache.calcite.sql.SqlFunction;
 import org.apache.calcite.sql.SqlFunctionCategory;
 import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.SqlUtil;
 import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandCountRanges;
 import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeFamily;
 import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.Static;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.sql.calcite.planner.Calcites;
 import org.apache.druid.sql.calcite.planner.PlannerContext;
@@ -41,6 +52,7 @@ import javax.annotation.Nullable;
 import java.util.Arrays;
 import java.util.List;
 import java.util.function.Function;
+import java.util.stream.IntStream;
 
 /**
  * Utilities for assisting in writing {@link SqlOperatorConversion} implementations.
@@ -131,7 +143,7 @@ public class OperatorConversions
 
   public static class OperatorBuilder
   {
-    private String name;
+    private final String name;
     private SqlKind kind = SqlKind.OTHER_FUNCTION;
     private SqlReturnTypeInference returnTypeInference;
     private SqlFunctionCategory functionCategory = SqlFunctionCategory.USER_DEFINED_FUNCTION;
@@ -200,27 +212,195 @@ public class OperatorConversions
 
     public SqlFunction build()
     {
+      // Create "nullableOperands" set including all optional arguments.
+      final IntSet nullableOperands = new IntArraySet();
+      if (requiredOperands != null) {
+        IntStream.range(requiredOperands, operandTypes.size()).forEach(nullableOperands::add);
+      }
+
       final SqlOperandTypeChecker theOperandTypeChecker;
 
       if (operandTypeChecker == null) {
-        theOperandTypeChecker = OperandTypes.family(
-            Preconditions.checkNotNull(operandTypes, "operandTypes"),
-            i -> requiredOperands == null || i + 1 > requiredOperands
+        theOperandTypeChecker = new DefaultOperandTypeChecker(
+            operandTypes,
+            requiredOperands == null ? operandTypes.size() : requiredOperands,
+            nullableOperands
         );
       } else if (operandTypes == null && requiredOperands == null) {
         theOperandTypeChecker = operandTypeChecker;
       } else {
-        throw new ISE("Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'");
+        throw new ISE(
+            "Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'"
+        );
       }
 
       return new SqlFunction(
           name,
           kind,
           Preconditions.checkNotNull(returnTypeInference, "returnTypeInference"),
-          null,
+          new DefaultOperandTypeInference(operandTypes, nullableOperands),
           theOperandTypeChecker,
           functionCategory
       );
     }
   }
+
+  /**
+   * Return the default, inferred specific type for a parameter that has a particular type family. Used to infer
+   * the type of NULL literals.
+   */
+  private static SqlTypeName defaultTypeForFamily(final SqlTypeFamily family)
+  {
+    switch (family) {
+      case NUMERIC:
+      case APPROXIMATE_NUMERIC:
+        return SqlTypeName.DOUBLE;
+      case INTEGER:
+      case EXACT_NUMERIC:
+        return SqlTypeName.BIGINT;
+      case CHARACTER:
+        return SqlTypeName.VARCHAR;
+      case TIMESTAMP:
+        return SqlTypeName.TIMESTAMP;
+      default:
+        // No good default type for this family; just return the first one (or NULL, if empty).
+        return Iterables.getFirst(family.getTypeNames(), SqlTypeName.NULL);
+    }
+  }
+
+  /**
+   * Operand type inference that simply reports the types derived by the validator.
+   *
+   * We do this so that Calcite will allow NULL literals for type-checked operands. Otherwise, it will not be able to
+   * infer their types, and it will report them as NULL types, which will make operand type checking fail.
+   */
+  private static class DefaultOperandTypeInference implements SqlOperandTypeInference
+  {
+    private final List<SqlTypeFamily> operandTypes;
+    private final IntSet nullableOperands;
+
+    DefaultOperandTypeInference(final List<SqlTypeFamily> operandTypes, final IntSet nullableOperands)
+    {
+      this.operandTypes = operandTypes;
+      this.nullableOperands = nullableOperands;
+    }
+
+    @Override
+    public void inferOperandTypes(
+        final SqlCallBinding callBinding,
+        final RelDataType returnType,
+        final RelDataType[] operandTypesOut
+    )
+    {
+      for (int i = 0; i < operandTypesOut.length; i++) {
+        final RelDataType derivedType = callBinding.getValidator()
+                                                   .deriveType(callBinding.getScope(), callBinding.operand(i));
+
+        final RelDataType inferredType;
+
+        if (derivedType.getSqlTypeName() != SqlTypeName.NULL) {
+          // We could derive a non-NULL type; retain it.
+          inferredType = derivedType;
+        } else {
+          // We couldn't derive a non-NULL type; infer the default for the operand type family.
+          if (nullableOperands.contains(i)) {
+            inferredType = Calcites.createSqlTypeWithNullability(
+                callBinding.getTypeFactory(),
+                defaultTypeForFamily(operandTypes.get(i)),
+                true
+            );
+          } else {
+            inferredType = callBinding.getValidator().getUnknownType();
+          }
+        }
+
+        operandTypesOut[i] = inferredType;
+      }
+    }
+  }
+
+  /**
+   * Operand type checker that is used in 'simple' situations: there are a particular number of operands, with
+   * particular types, some of which may be optional or nullable.
+   */
+  private static class DefaultOperandTypeChecker implements SqlOperandTypeChecker
+  {
+    private final List<SqlTypeFamily> operandTypes;
+    private final int requiredOperands;
+    private final IntSet nullableOperands;
+
+    DefaultOperandTypeChecker(
+        final List<SqlTypeFamily> operandTypes,
+        final int requiredOperands,
+        final IntSet nullableOperands
+    )
+    {
+      Preconditions.checkArgument(requiredOperands <= operandTypes.size() && requiredOperands >= 0);
+      this.operandTypes = Preconditions.checkNotNull(operandTypes, "operandTypes");
+      this.requiredOperands = requiredOperands;
+      this.nullableOperands = Preconditions.checkNotNull(nullableOperands, "nullableOperands");
+    }
+
+    @Override
+    public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure)
+    {
+      if (operandTypes.size() != callBinding.getOperandCount()) {
+        // Just like FamilyOperandTypeChecker: assume this is an inapplicable sub-rule of a composite rule; don't throw
+        return false;
+      }
+
+      for (int i = 0; i < callBinding.operands().size(); i++) {
+        final SqlNode operand = callBinding.operands().get(i);
+        final RelDataType operandType = callBinding.getValidator().deriveType(callBinding.getScope(), operand);
+        final SqlTypeFamily expectedFamily = operandTypes.get(i);
+
+        if (expectedFamily == SqlTypeFamily.ANY) {
+          // ANY matches anything. This operand is all good; do nothing.
+        } else if (expectedFamily.getTypeNames().contains(operandType.getSqlTypeName())) {
+          // Operand came in with one of the expected types.
+        } else if (operandType.getSqlTypeName() == SqlTypeName.NULL) {
+          // Null came in, check if operand is a nullable type.
+          if (!nullableOperands.contains(i)) {
+            if (throwOnFailure) {
+              throw callBinding.getValidator().newValidationError(operand, Static.RESOURCE.nullIllegal());
+            } else {
+              return false;
+            }
+          }
+        } else {
+          if (throwOnFailure) {
+            throw callBinding.newValidationSignatureError();
+          } else {
+            return false;
+          }
+        }
+      }
+
+      return true;
+    }
+
+    @Override
+    public SqlOperandCountRange getOperandCountRange()
+    {
+      return SqlOperandCountRanges.between(requiredOperands, operandTypes.size());
+    }
+
+    @Override
+    public String getAllowedSignatures(SqlOperator op, String opName)
+    {
+      return SqlUtil.getAliasedSignature(op, opName, operandTypes);
+    }
+
+    @Override
+    public Consistency getConsistency()
+    {
+      return Consistency.NONE;
+    }
+
+    @Override
+    public boolean isOptional(int i)
+    {
+      return i + 1 > requiredOperands;
+    }
+  }
 }
diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index 79e51d5..7db1910 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -70,9 +70,11 @@ import org.apache.druid.query.topn.TopNQueryBuilder;
 import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.sql.calcite.expression.DruidExpression;
 import org.apache.druid.sql.calcite.filtration.Filtration;
+import org.apache.druid.sql.calcite.planner.Calcites;
 import org.apache.druid.sql.calcite.rel.CannotBuildQueryException;
 import org.apache.druid.sql.calcite.util.CalciteTests;
 import org.hamcrest.CoreMatchers;
+import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.Interval;
 import org.joda.time.Period;
@@ -5296,6 +5298,39 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
   }
 
   @Test
+  public void testGroupAndFilterOnTimeFloorWithTimeZone() throws Exception
+  {
+    testQuery(
+        "SELECT TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles'), COUNT(*)\n"
+        + "FROM druid.foo\n"
+        + "WHERE\n"
+        + "TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles') = "
+        + "  TIME_PARSE('2000-01-01 00:00:00', NULL, 'America/Los_Angeles')\n"
+        + "OR TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles') = "
+        + "  TIME_PARSE('2000-02-01 00:00:00', NULL, 'America/Los_Angeles')\n"
+        + "GROUP BY 1",
+        ImmutableList.of(
+            Druids.newTimeseriesQueryBuilder()
+                  .dataSource(CalciteTests.DATASOURCE1)
+                  .intervals(querySegmentSpec(Intervals.of("2000-01-01T00-08:00/2000-03-01T00-08:00")))
+                  .granularity(new PeriodGranularity(Period.months(1), null, DateTimes.inferTzFromString(LOS_ANGELES)))
+                  .aggregators(aggregators(new CountAggregatorFactory("a0")))
+                  .context(TIMESERIES_CONTEXT_DEFAULT)
+                  .build()
+        ),
+        ImmutableList.of(
+            new Object[]{
+                Calcites.jodaToCalciteTimestamp(
+                    new DateTime("2000-01-01", DateTimes.inferTzFromString(LOS_ANGELES)),
+                    DateTimeZone.UTC
+                ),
+                2L
+            }
+        )
+    );
+  }
+
+  @Test
   public void testFilterOnCurrentTimestampWithIntervalArithmetic() throws Exception
   {
     testQuery(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org