You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/09/23 09:21:01 UTC

hive git commit: HIVE-14751: Add support for date truncation (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 3237bb276 -> e532549f1


HIVE-14751: Add support for date truncation (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e532549f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e532549f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e532549f

Branch: refs/heads/master
Commit: e532549f162c29e6f621c9cb0ba7588e5882e4bc
Parents: 3237bb2
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Fri Sep 23 10:19:55 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Sep 23 10:19:55 2016 +0100

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/parse/HiveLexer.g |   3 +
 .../hadoop/hive/ql/parse/IdentifiersParser.g    |  30 ++-
 .../apache/hadoop/hive/ql/udf/UDFDateFloor.java |   8 +-
 .../ql/udf/TestUDFDateFormatGranularity.java    |  22 +-
 ql/src/test/queries/clientpositive/floor_time.q |  46 ++++
 .../results/clientpositive/floor_time.q.out     | 211 +++++++++++++++++++
 6 files changed, 306 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e532549f/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 7ceb005..af659ad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -302,7 +302,9 @@ KW_CONF: 'CONF';
 KW_VALUES: 'VALUES';
 KW_RELOAD: 'RELOAD';
 KW_YEAR: 'YEAR';
+KW_QUARTER: 'QUARTER';
 KW_MONTH: 'MONTH';
+KW_WEEK: 'WEEK';
 KW_DAY: 'DAY';
 KW_HOUR: 'HOUR';
 KW_MINUTE: 'MINUTE';
@@ -329,6 +331,7 @@ KW_RELY: 'RELY';
 KW_NORELY: 'NORELY';
 KW_KEY: 'KEY';
 KW_ABORT: 'ABORT';
+KW_FLOOR: 'FLOOR';
 
 // Operators
 // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.

http://git-wip-us.apache.org/repos/asf/hive/blob/e532549f/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 9ba1865..7842d50 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -241,6 +241,30 @@ whenExpression
     KW_END -> ^(TOK_FUNCTION KW_WHEN expression*)
     ;
 
+floorExpression
+    :
+    KW_FLOOR
+    LPAREN
+          expression
+          (KW_TO
+          (floorUnit=floorDateQualifiers))?
+    RPAREN
+    -> {floorUnit != null}? ^(TOK_FUNCTION $floorUnit expression)
+    -> ^(TOK_FUNCTION Identifier["floor"] expression)
+    ;
+
+floorDateQualifiers
+    :
+    KW_YEAR -> Identifier["floor_year"]
+    | KW_QUARTER -> Identifier["floor_quarter"]
+    | KW_MONTH -> Identifier["floor_month"]
+    | KW_WEEK -> Identifier["floor_week"]
+    | KW_DAY -> Identifier["floor_day"]
+    | KW_HOUR -> Identifier["floor_hour"]
+    | KW_MINUTE -> Identifier["floor_minute"]
+    | KW_SECOND -> Identifier["floor_second"]
+    ;
+
 constant
 @init { gParent.pushMsg("constant", state); }
 @after { gParent.popMsg(state); }
@@ -323,6 +347,7 @@ atomExpression
     (KW_NULL) => KW_NULL -> TOK_NULL
     | (constant) => constant
     | castExpression
+    | floorExpression
     | caseExpression
     | whenExpression
     | (functionName LPAREN) => function
@@ -566,6 +591,7 @@ sysFuncNames
     | KW_IF
     | KW_CASE
     | KW_WHEN
+    | KW_FLOOR
     | KW_TINYINT
     | KW_SMALLINT
     | KW_INT
@@ -654,13 +680,13 @@ nonReserved
     | KW_KEYS | KW_KEY_TYPE | KW_LAST | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
     | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE
     | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY
-    | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER
+    | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_QUARTER | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER
     | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE
     | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED
     | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED
     | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED
     | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | KW_UNIONTYPE | KW_UNLOCK | KW_UNSET
-    | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE | KW_VIEW | KW_WHILE | KW_YEAR
+    | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE | KW_VIEW | KW_WEEK | KW_WHILE | KW_YEAR
     | KW_WORK
     | KW_TRANSACTION
     | KW_WRITE

http://git-wip-us.apache.org/repos/asf/hive/blob/e532549f/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java
index 08ed9fd..126ed44 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java
@@ -40,7 +40,6 @@ import com.google.common.collect.ImmutableMap;
 public abstract class UDFDateFloor extends UDF {
 
   private final QueryGranularity granularity;
-
   private final TimestampWritable result;
 
   public UDFDateFloor(String granularity) {
@@ -52,7 +51,12 @@ public abstract class UDFDateFloor extends UDF {
     if (t == null) {
       return null;
     }
-    long newTimestamp = granularity.truncate(t.getTimestamp().getTime());
+    final long originalTimestamp = t.getTimestamp().getTime(); // default
+    final long originalTimestampUTC = new DateTime(originalTimestamp)
+        .withZoneRetainFields(DateTimeZone.UTC).getMillis(); // default -> utc
+    final long newTimestampUTC = granularity.truncate(originalTimestampUTC); // utc
+    final long newTimestamp = new DateTime(newTimestampUTC, DateTimeZone.UTC)
+        .withZoneRetainFields(DateTimeZone.getDefault()).getMillis(); // utc -> default
     result.setTime(newTimestamp);
     return result;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/e532549f/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFDateFormatGranularity.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFDateFormatGranularity.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFDateFormatGranularity.java
index f871de2..03b2d42 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFDateFormatGranularity.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFDateFormatGranularity.java
@@ -25,61 +25,63 @@ import org.junit.Test;
 import junit.framework.TestCase;
 
 public class TestUDFDateFormatGranularity extends TestCase {
-  
+
+  // Timestamp values are PST (timezone for tests is set to PST by default)
+
   @Test
   public void testTimestampToTimestampWithGranularity() throws Exception {
     // Running example
     // Friday 30th August 1985 02:47:02 AM
-    final TimestampWritable t = new TimestampWritable(new Timestamp(494218022082L));
+    final TimestampWritable t = new TimestampWritable(new Timestamp(494243222000L));
     UDFDateFloor g;
 
     // Year granularity
     // Tuesday 1st January 1985 12:00:00 AM
     g = new UDFDateFloorYear();
     TimestampWritable i1 = g.evaluate(t);
-    assertEquals(473385600000L, i1.getTimestamp().getTime());
+    assertEquals(473414400000L, i1.getTimestamp().getTime());
     
     // Quarter granularity
     // Monday 1st July 1985 12:00:00 AM
     g = new UDFDateFloorQuarter();
     TimestampWritable i2 = g.evaluate(t);
-    assertEquals(489024000000L, i2.getTimestamp().getTime());
+    assertEquals(489049200000L, i2.getTimestamp().getTime());
 
     // Month granularity
     // Thursday 1st August 1985 12:00:00 AM
     g = new UDFDateFloorMonth();
     TimestampWritable i3 = g.evaluate(t);
-    assertEquals(491702400000L, i3.getTimestamp().getTime());
+    assertEquals(491727600000L, i3.getTimestamp().getTime());
 
     // Week granularity
     // Monday 26th August 1985 12:00:00 AM
     g = new UDFDateFloorWeek();
     TimestampWritable i4 = g.evaluate(t);
-    assertEquals(493862400000L, i4.getTimestamp().getTime());
+    assertEquals(493887600000L, i4.getTimestamp().getTime());
 
     // Day granularity
     // Friday 30th August 1985 12:00:00 AM
     g = new UDFDateFloorDay();
     TimestampWritable i5 = g.evaluate(t);
-    assertEquals(494208000000L, i5.getTimestamp().getTime());
+    assertEquals(494233200000L, i5.getTimestamp().getTime());
 
     // Hour granularity
     // Friday 30th August 1985 02:00:00 AM
     g = new UDFDateFloorHour();
     TimestampWritable i6 = g.evaluate(t);
-    assertEquals(494215200000L, i6.getTimestamp().getTime());
+    assertEquals(494240400000L, i6.getTimestamp().getTime());
 
     // Minute granularity
     // Friday 30th August 1985 02:47:00 AM
     g = new UDFDateFloorMinute();
     TimestampWritable i7 = g.evaluate(t);
-    assertEquals(494218020000L, i7.getTimestamp().getTime());
+    assertEquals(494243220000L, i7.getTimestamp().getTime());
 
     // Second granularity
     // Friday 30th August 1985 02:47:02 AM
     g = new UDFDateFloorSecond();
     TimestampWritable i8 = g.evaluate(t);
-    assertEquals(494218022000L, i8.getTimestamp().getTime());
+    assertEquals(494243222000L, i8.getTimestamp().getTime());
   }
 
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/e532549f/ql/src/test/queries/clientpositive/floor_time.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/floor_time.q b/ql/src/test/queries/clientpositive/floor_time.q
new file mode 100644
index 0000000..b0f6f90
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/floor_time.q
@@ -0,0 +1,46 @@
+drop table extract_udf;
+
+create table extract_udf (t timestamp);
+from (select * from src tablesample (1 rows)) s
+  insert overwrite table extract_udf 
+    select '2011-05-06 07:08:09.1234567';
+
+select t
+from extract_udf;
+
+explain
+select floor_day(t)
+from extract_udf;
+
+select floor_day(t)
+from extract_udf;
+
+-- new syntax
+explain
+select floor(t to day)
+from extract_udf;
+
+select floor(t to day)
+from extract_udf;
+
+
+select floor(t to second)
+from extract_udf;
+
+select floor(t to minute)
+from extract_udf;
+
+select floor(t to hour)
+from extract_udf;
+
+select floor(t to week)
+from extract_udf;
+
+select floor(t to month)
+from extract_udf;
+
+select floor(t to quarter)
+from extract_udf;
+
+select floor(t to year)
+from extract_udf;

http://git-wip-us.apache.org/repos/asf/hive/blob/e532549f/ql/src/test/results/clientpositive/floor_time.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/floor_time.q.out b/ql/src/test/results/clientpositive/floor_time.q.out
new file mode 100644
index 0000000..f0cb324
--- /dev/null
+++ b/ql/src/test/results/clientpositive/floor_time.q.out
@@ -0,0 +1,211 @@
+PREHOOK: query: drop table extract_udf
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table extract_udf
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table extract_udf (t timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@extract_udf
+POSTHOOK: query: create table extract_udf (t timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@extract_udf
+PREHOOK: query: from (select * from src tablesample (1 rows)) s
+  insert overwrite table extract_udf 
+    select '2011-05-06 07:08:09.1234567'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@extract_udf
+POSTHOOK: query: from (select * from src tablesample (1 rows)) s
+  insert overwrite table extract_udf 
+    select '2011-05-06 07:08:09.1234567'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@extract_udf
+POSTHOOK: Lineage: extract_udf.t EXPRESSION []
+PREHOOK: query: select t
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select t
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-06 07:08:09.1234567
+PREHOOK: query: explain
+select floor_day(t)
+from extract_udf
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select floor_day(t)
+from extract_udf
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: extract_udf
+            Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: floor_day(t) (type: timestamp)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select floor_day(t)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor_day(t)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-06 00:00:00
+PREHOOK: query: -- new syntax
+explain
+select floor(t to day)
+from extract_udf
+PREHOOK: type: QUERY
+POSTHOOK: query: -- new syntax
+explain
+select floor(t to day)
+from extract_udf
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: extract_udf
+            Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: floor_day(t) (type: timestamp)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select floor(t to day)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to day)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-06 00:00:00
+PREHOOK: query: select floor(t to second)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to second)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-06 07:08:09
+PREHOOK: query: select floor(t to minute)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to minute)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-06 07:08:00
+PREHOOK: query: select floor(t to hour)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to hour)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-06 07:00:00
+PREHOOK: query: select floor(t to week)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to week)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-02 00:00:00
+PREHOOK: query: select floor(t to month)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to month)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-05-01 00:00:00
+PREHOOK: query: select floor(t to quarter)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to quarter)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-04-01 00:00:00
+PREHOOK: query: select floor(t to year)
+from extract_udf
+PREHOOK: type: QUERY
+PREHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+POSTHOOK: query: select floor(t to year)
+from extract_udf
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@extract_udf
+#### A masked pattern was here ####
+2011-01-01 00:00:00