You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ar...@apache.org on 2020/04/11 15:35:15 UTC

[drill] 03/05: DRILL-7668: Allow Time Bucket Function to Accept Floats and Timestamps

This is an automated email from the ASF dual-hosted git repository.

arina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit ffaaad68475181c8aa9af171c9f823da3306f166
Author: Charles Givre <cg...@apache.org>
AuthorDate: Mon Apr 6 16:11:31 2020 -0400

    DRILL-7668: Allow Time Bucket Function to Accept Floats and Timestamps
    
    closes #2040
---
 .../drill/exec/udfs/TimeBucketFunctions.java       | 86 +++++++++++++++++++++-
 .../drill/exec/udfs/TestTimeBucketFunction.java    | 66 +++++++++++++----
 2 files changed, 132 insertions(+), 20 deletions(-)

diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java
index b7f3fae..10efd5c 100644
--- a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java
+++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java
@@ -23,6 +23,8 @@ import org.apache.drill.exec.expr.annotations.FunctionTemplate;
 import org.apache.drill.exec.expr.annotations.Output;
 import org.apache.drill.exec.expr.annotations.Param;
 import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.Float8Holder;
+import org.apache.drill.exec.expr.holders.TimeStampHolder;
 
 
 public class TimeBucketFunctions {
@@ -59,9 +61,9 @@ public class TimeBucketFunctions {
       long timestamp = inputDate.value;
 
       // Get the interval in milliseconds and convert to nanoseconds
-      long intervalToAdd = interval.value * 1000000;
+      long groupByInterval = interval.value * 1000000;
 
-      out.value = timestamp - (timestamp % intervalToAdd);
+      out.value = timestamp - (timestamp % groupByInterval);
     }
   }
 
@@ -97,9 +99,85 @@ public class TimeBucketFunctions {
       long timestamp = inputDate.value;
 
       // Get the interval in milliseconds
-      long intervalToAdd = interval.value;
+      long groupByInterval = interval.value;
 
-      out.value = timestamp - (timestamp % intervalToAdd);
+      out.value = timestamp - (timestamp % groupByInterval);
+    }
+  }
+
+  /**
+   * This function is used for facilitating time series analysis by creating buckets of time intervals.  See
+   * https://blog.timescale.com/blog/simplified-time-series-analytics-using-the-time_bucket-function/ for usage. The function takes two arguments:
+   * 1. The timestamp (as a Drill timestamp)
+   * 2. The desired bucket interval IN milliseconds
+   *
+   * The function returns a BIGINT of the nearest time bucket.
+   */
+  @FunctionTemplate(name = "time_bucket",
+    scope = FunctionTemplate.FunctionScope.SIMPLE,
+    nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
+  public static class TimestampTimeBucketFunction implements DrillSimpleFunc {
+
+    @Param
+    TimeStampHolder inputDate;
+
+    @Param
+    BigIntHolder interval;
+
+    @Output
+    TimeStampHolder out;
+
+    @Override
+    public void setup() {
+    }
+
+    @Override
+    public void eval() {
+      // Get the timestamp in milliseconds
+      long timestamp = inputDate.value;
+
+      // Get the interval in milliseconds
+      long groupByInterval = interval.value;
+
+      out.value = (timestamp - (timestamp % groupByInterval));
+    }
+  }
+
+  /**
+   * This function is used for facilitating time series analysis by creating buckets of time intervals.  See
+   * https://blog.timescale.com/blog/simplified-time-series-analytics-using-the-time_bucket-function/ for usage. The function takes two arguments:
+   * 1. The timestamp (as a Drill timestamp)
+   * 2. The desired bucket interval IN milliseconds
+   *
+   * The function returns a BIGINT of the nearest time bucket.
+   */
+  @FunctionTemplate(name = "time_bucket",
+    scope = FunctionTemplate.FunctionScope.SIMPLE,
+    nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
+  public static class DoubleTimeBucketFunction implements DrillSimpleFunc {
+
+    @Param
+    Float8Holder inputDate;
+
+    @Param
+    BigIntHolder interval;
+
+    @Output
+    BigIntHolder out;
+
+    @Override
+    public void setup() {
+    }
+
+    @Override
+    public void eval() {
+      // Get the timestamp in milliseconds
+      long timestamp = java.lang.Math.round(inputDate.value);
+
+      // Get the interval in milliseconds
+      long groupByInterval = interval.value;
+
+      out.value = timestamp - (timestamp % groupByInterval);
     }
   }
 }
diff --git a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java
index a670e1e..127486f 100644
--- a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java
+++ b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java
@@ -27,10 +27,23 @@ import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
+import java.time.LocalDateTime;
+
 
 @Category({UnlikelyTest.class, SqlFunctionTest.class})
 public class TestTimeBucketFunction extends ClusterTest {
 
+  // Friday, January 1, 2016 12:06:00 AM
+  private final static long MILLISECOND_TIMESTAMP = 1451606760L;
+
+  // Friday, January 1, 2016 12:06:00 AM
+  private final static long NAMOSECOND_TIMESTAMP = 1451606760000000000L;
+
+  // Friday, March 27, 2020 1:33:53.845 AM
+  private final static long MARCH27_TIMESTAMP = 1585272833845L;
+
+  private final static int INTERVAL = 300000;
+
   @BeforeClass
   public static void setup() throws Exception {
     ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher);
@@ -39,10 +52,10 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testTimeBucketNanoSeconds() throws Exception {
-    String query = "SELECT time_bucket_ns(1451606760000000000, 300000) AS high FROM (values(1))";
+    String query = String.format("SELECT time_bucket_ns(%d, %d) AS high", NAMOSECOND_TIMESTAMP, INTERVAL);
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues(1451606700000000000L)
       .go();
@@ -50,10 +63,10 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testNullTimeBucketNanoSeconds() throws Exception {
-    String query = "SELECT time_bucket_ns(null, 300000) AS high FROM (values(1))";
+    String query = String.format("SELECT time_bucket_ns(null, %d) AS high", INTERVAL);
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues((Long) null)
       .go();
@@ -61,10 +74,10 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testNullIntervalTimeBucketNanoSeconds() throws Exception {
-    String query = "SELECT time_bucket_ns(1451606760000000000, null) AS high FROM (values(1))";
+    String query = String.format("SELECT time_bucket_ns(%d, null) AS high", NAMOSECOND_TIMESTAMP);
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues((Long) null)
       .go();
@@ -72,10 +85,10 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testBothNullIntervalTimeBucketNanoSeconds() throws Exception {
-    String query = "SELECT time_bucket_ns(null, null) AS high FROM (values(1))";
+    String query = "SELECT time_bucket_ns(null, null) AS high";
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues((Long) null)
       .go();
@@ -83,18 +96,40 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testTimeBucket() throws Exception {
-    String query = "SELECT time_bucket(1451606760, 300000) AS high FROM (values(1))";
+    String query = String.format("SELECT time_bucket(%d, 300000) AS high", MILLISECOND_TIMESTAMP);
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
+      .baselineColumns("high")
+      .baselineValues(1451400000L)
+      .go();
+  }
+
+  @Test
+  public void testDoubleTimeBucket() throws Exception {
+    String query = String.format("SELECT time_bucket(CAST(%d AS DOUBLE), 300000) AS high", MILLISECOND_TIMESTAMP);
+    testBuilder()
+      .sqlQuery(query)
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues(1451400000L)
       .go();
   }
 
   @Test
+  public void testTimeBucketTimestamp() throws Exception {
+    String query = String.format("SELECT time_bucket(CAST( %d AS TIMESTAMP), 300000) AS high", MARCH27_TIMESTAMP);
+    testBuilder()
+      .sqlQuery(query)
+      .unOrdered()
+      .baselineColumns("high")
+      .baselineValues(LocalDateTime.of(2020, 3, 27, 1, 30, 0))
+      .go();
+  }
+
+  @Test
   public void testNullTimeBucket() throws Exception {
-    String query = "SELECT time_bucket(null, 300000) AS high FROM (values(1))";
+    String query = String.format("SELECT time_bucket(null, %d) AS high", INTERVAL);
     testBuilder()
       .sqlQuery(query)
       .ordered()
@@ -105,10 +140,10 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testNullIntervalTimeBucket() throws Exception {
-    String query = "SELECT time_bucket(1451606760, null) AS high FROM (values(1))";
+    String query = String.format("SELECT time_bucket(%d, null) AS high", MILLISECOND_TIMESTAMP);
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues((Long) null)
       .go();
@@ -116,13 +151,12 @@ public class TestTimeBucketFunction extends ClusterTest {
 
   @Test
   public void testBothNullIntervalTimeBucket() throws Exception {
-    String query = "SELECT time_bucket(null, null) AS high FROM (values(1))";
+    String query = "SELECT time_bucket(null, null) AS high";
     testBuilder()
       .sqlQuery(query)
-      .ordered()
+      .unOrdered()
       .baselineColumns("high")
       .baselineValues((Long) null)
       .go();
   }
-
 }