You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ar...@apache.org on 2020/04/11 15:35:15 UTC
[drill] 03/05: DRILL-7668: Allow Time Bucket Function to Accept
Floats and Timestamps
This is an automated email from the ASF dual-hosted git repository.
arina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit ffaaad68475181c8aa9af171c9f823da3306f166
Author: Charles Givre <cg...@apache.org>
AuthorDate: Mon Apr 6 16:11:31 2020 -0400
DRILL-7668: Allow Time Bucket Function to Accept Floats and Timestamps
closes #2040
---
.../drill/exec/udfs/TimeBucketFunctions.java | 86 +++++++++++++++++++++-
.../drill/exec/udfs/TestTimeBucketFunction.java | 66 +++++++++++++----
2 files changed, 132 insertions(+), 20 deletions(-)
diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java
index b7f3fae..10efd5c 100644
--- a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java
+++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/TimeBucketFunctions.java
@@ -23,6 +23,8 @@ import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.Float8Holder;
+import org.apache.drill.exec.expr.holders.TimeStampHolder;
public class TimeBucketFunctions {
@@ -59,9 +61,9 @@ public class TimeBucketFunctions {
long timestamp = inputDate.value;
// Get the interval in milliseconds and convert to nanoseconds
- long intervalToAdd = interval.value * 1000000;
+ long groupByInterval = interval.value * 1000000;
- out.value = timestamp - (timestamp % intervalToAdd);
+ out.value = timestamp - (timestamp % groupByInterval);
}
}
@@ -97,9 +99,85 @@ public class TimeBucketFunctions {
long timestamp = inputDate.value;
// Get the interval in milliseconds
- long intervalToAdd = interval.value;
+ long groupByInterval = interval.value;
- out.value = timestamp - (timestamp % intervalToAdd);
+ out.value = timestamp - (timestamp % groupByInterval);
+ }
+ }
+
+ /**
+ * This function is used for facilitating time series analysis by creating buckets of time intervals. See
+ * https://blog.timescale.com/blog/simplified-time-series-analytics-using-the-time_bucket-function/ for usage. The function takes two arguments:
+ * 1. The timestamp (as a Drill timestamp)
+ * 2. The desired bucket interval IN milliseconds
+ *
+ * The function returns a BIGINT of the nearest time bucket.
+ */
+ @FunctionTemplate(name = "time_bucket",
+ scope = FunctionTemplate.FunctionScope.SIMPLE,
+ nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
+ public static class TimestampTimeBucketFunction implements DrillSimpleFunc {
+
+ @Param
+ TimeStampHolder inputDate;
+
+ @Param
+ BigIntHolder interval;
+
+ @Output
+ TimeStampHolder out;
+
+ @Override
+ public void setup() {
+ }
+
+ @Override
+ public void eval() {
+ // Get the timestamp in milliseconds
+ long timestamp = inputDate.value;
+
+ // Get the interval in milliseconds
+ long groupByInterval = interval.value;
+
+ out.value = (timestamp - (timestamp % groupByInterval));
+ }
+ }
+
+ /**
+ * This function is used for facilitating time series analysis by creating buckets of time intervals. See
+ * https://blog.timescale.com/blog/simplified-time-series-analytics-using-the-time_bucket-function/ for usage. The function takes two arguments:
+ * 1. The timestamp (as a Drill timestamp)
+ * 2. The desired bucket interval IN milliseconds
+ *
+ * The function returns a BIGINT of the nearest time bucket.
+ */
+ @FunctionTemplate(name = "time_bucket",
+ scope = FunctionTemplate.FunctionScope.SIMPLE,
+ nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
+ public static class DoubleTimeBucketFunction implements DrillSimpleFunc {
+
+ @Param
+ Float8Holder inputDate;
+
+ @Param
+ BigIntHolder interval;
+
+ @Output
+ BigIntHolder out;
+
+ @Override
+ public void setup() {
+ }
+
+ @Override
+ public void eval() {
+ // Get the timestamp in milliseconds
+ long timestamp = java.lang.Math.round(inputDate.value);
+
+ // Get the interval in milliseconds
+ long groupByInterval = interval.value;
+
+ out.value = timestamp - (timestamp % groupByInterval);
}
}
}
diff --git a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java
index a670e1e..127486f 100644
--- a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java
+++ b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestTimeBucketFunction.java
@@ -27,10 +27,23 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import java.time.LocalDateTime;
+
@Category({UnlikelyTest.class, SqlFunctionTest.class})
public class TestTimeBucketFunction extends ClusterTest {
+ // Friday, January 1, 2016 12:06:00 AM
+ private final static long MILLISECOND_TIMESTAMP = 1451606760L;
+
+ // Friday, January 1, 2016 12:06:00 AM
+ private final static long NAMOSECOND_TIMESTAMP = 1451606760000000000L;
+
+ // Friday, March 27, 2020 1:33:53.845 AM
+ private final static long MARCH27_TIMESTAMP = 1585272833845L;
+
+ private final static int INTERVAL = 300000;
+
@BeforeClass
public static void setup() throws Exception {
ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher);
@@ -39,10 +52,10 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testTimeBucketNanoSeconds() throws Exception {
- String query = "SELECT time_bucket_ns(1451606760000000000, 300000) AS high FROM (values(1))";
+ String query = String.format("SELECT time_bucket_ns(%d, %d) AS high", NAMOSECOND_TIMESTAMP, INTERVAL);
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
.baselineColumns("high")
.baselineValues(1451606700000000000L)
.go();
@@ -50,10 +63,10 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testNullTimeBucketNanoSeconds() throws Exception {
- String query = "SELECT time_bucket_ns(null, 300000) AS high FROM (values(1))";
+ String query = String.format("SELECT time_bucket_ns(null, %d) AS high", INTERVAL);
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
.baselineColumns("high")
.baselineValues((Long) null)
.go();
@@ -61,10 +74,10 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testNullIntervalTimeBucketNanoSeconds() throws Exception {
- String query = "SELECT time_bucket_ns(1451606760000000000, null) AS high FROM (values(1))";
+ String query = String.format("SELECT time_bucket_ns(%d, null) AS high", NAMOSECOND_TIMESTAMP);
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
.baselineColumns("high")
.baselineValues((Long) null)
.go();
@@ -72,10 +85,10 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testBothNullIntervalTimeBucketNanoSeconds() throws Exception {
- String query = "SELECT time_bucket_ns(null, null) AS high FROM (values(1))";
+ String query = "SELECT time_bucket_ns(null, null) AS high";
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
.baselineColumns("high")
.baselineValues((Long) null)
.go();
@@ -83,18 +96,40 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testTimeBucket() throws Exception {
- String query = "SELECT time_bucket(1451606760, 300000) AS high FROM (values(1))";
+ String query = String.format("SELECT time_bucket(%d, 300000) AS high", MILLISECOND_TIMESTAMP);
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
+ .baselineColumns("high")
+ .baselineValues(1451400000L)
+ .go();
+ }
+
+ @Test
+ public void testDoubleTimeBucket() throws Exception {
+ String query = String.format("SELECT time_bucket(CAST(%d AS DOUBLE), 300000) AS high", MILLISECOND_TIMESTAMP);
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
.baselineColumns("high")
.baselineValues(1451400000L)
.go();
}
@Test
+ public void testTimeBucketTimestamp() throws Exception {
+ String query = String.format("SELECT time_bucket(CAST( %d AS TIMESTAMP), 300000) AS high", MARCH27_TIMESTAMP);
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("high")
+ .baselineValues(LocalDateTime.of(2020, 3, 27, 1, 30, 0))
+ .go();
+ }
+
+ @Test
public void testNullTimeBucket() throws Exception {
- String query = "SELECT time_bucket(null, 300000) AS high FROM (values(1))";
+ String query = String.format("SELECT time_bucket(null, %d) AS high", INTERVAL);
testBuilder()
.sqlQuery(query)
.ordered()
@@ -105,10 +140,10 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testNullIntervalTimeBucket() throws Exception {
- String query = "SELECT time_bucket(1451606760, null) AS high FROM (values(1))";
+ String query = String.format("SELECT time_bucket(%d, null) AS high", MILLISECOND_TIMESTAMP);
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
.baselineColumns("high")
.baselineValues((Long) null)
.go();
@@ -116,13 +151,12 @@ public class TestTimeBucketFunction extends ClusterTest {
@Test
public void testBothNullIntervalTimeBucket() throws Exception {
- String query = "SELECT time_bucket(null, null) AS high FROM (values(1))";
+ String query = "SELECT time_bucket(null, null) AS high";
testBuilder()
.sqlQuery(query)
- .ordered()
+ .unOrdered()
.baselineColumns("high")
.baselineValues((Long) null)
.go();
}
-
}