You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/09/28 21:43:35 UTC

[hive] branch master updated: HIVE-22241: Implement UDF to interpret date/timestamp using its internal representation and Gregorian-Julian hybrid calendar (Jesus Camacho Rodriguez, reviewed by Adam Szita)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 52224f4  HIVE-22241: Implement UDF to interpret date/timestamp using its internal representation and Gregorian-Julian hybrid calendar (Jesus Camacho Rodriguez, reviewed by Adam Szita)
52224f4 is described below

commit 52224f4adc4ac61e5b5349d1c1cc5f94882d183b
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Tue Sep 24 21:17:06 2019 -0700

    HIVE-22241: Implement UDF to interpret date/timestamp using its internal representation and Gregorian-Julian hybrid calendar (Jesus Camacho Rodriguez, reviewed by Adam Szita)
    
    Close apache/hive#789
---
 .../hadoop/hive/ql/exec/FunctionRegistry.java      |   2 +
 .../GenericUDFDatetimeLegacyHybridCalendar.java    | 137 +++++++++++++++++++++
 ...TestGenericUDFDatetimeLegacyHybridCalendar.java | 112 +++++++++++++++++
 .../udf_datetime_legacy_hybrid_calendar.q          |  12 ++
 .../results/clientpositive/show_functions.q.out    |   3 +
 .../udf_datetime_legacy_hybrid_calendar.q.out      |  49 ++++++++
 6 files changed, 315 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index b0d7a4e..1616409 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -522,6 +522,8 @@ public final class FunctionRegistry {
     system.registerGenericUDF("unix_timestamp", GenericUDFUnixTimeStamp.class);
     system.registerGenericUDF("to_unix_timestamp", GenericUDFToUnixTimeStamp.class);
 
+    system.registerGenericUDF("datetime_legacy_hybrid_calendar", GenericUDFDatetimeLegacyHybridCalendar.class);
+
     system.registerGenericUDF("internal_interval", GenericUDFInternalInterval.class);
 
     system.registerGenericUDF("to_epoch_milli", GenericUDFEpochMilli.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
new file mode 100644
index 0000000..4a94b44
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.text.SimpleDateFormat;
+import java.util.TimeZone;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+
+/**
+ * GenericUDFToProlepticGregorian.
+ */
+@Description(name = "datetime_legacy_hybrid_calendar",
+    value = "_FUNC_(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar \n"
+        + "assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.",
+    extended = "Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced \n"
+        + "by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming \n"
+        + "that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, \n"
+        + "i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single \n"
+        + "discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.")
+public class GenericUDFDatetimeLegacyHybridCalendar extends GenericUDF {
+
+  private transient PrimitiveObjectInspector inputOI;
+  private transient PrimitiveObjectInspector resultOI;
+  private transient Converter converter;
+  private transient SimpleDateFormat formatter;
+
+  private DateWritableV2 dateWritable = new DateWritableV2();
+  private TimestampWritableV2 timestampWritable = new TimestampWritableV2();
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+    if (arguments.length < 1) {
+      throw new UDFArgumentLengthException(
+          "The function datetime_legacy_hybrid_calendar requires at least one argument, got "
+              + arguments.length);
+    }
+
+    try {
+      inputOI = (PrimitiveObjectInspector) arguments[0];
+      PrimitiveCategory pc = inputOI.getPrimitiveCategory();
+      switch (pc) {
+        case DATE:
+          formatter = new SimpleDateFormat("yyyy-MM-dd");
+          formatter.setLenient(false);
+          formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+          converter = ObjectInspectorConverters.getConverter(inputOI,
+              PrimitiveObjectInspectorFactory.writableDateObjectInspector);
+          resultOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+          break;
+        case TIMESTAMP:
+          formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+          formatter.setLenient(false);
+          formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+          converter = ObjectInspectorConverters.getConverter(inputOI,
+              PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
+          resultOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+          break;
+        default:
+          throw new UDFArgumentException(
+              "datetime_legacy_hybrid_calendar only allows date or timestamp types");
+      }
+    } catch (ClassCastException e) {
+      throw new UDFArgumentException(
+          "The function datetime_legacy_hybrid_calendar takes only primitive types");
+    }
+
+    return resultOI;
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    Object input = arguments[0].get();
+    if (input == null) {
+      return null;
+    }
+
+    input = converter.convert(input);
+
+    switch (resultOI.getPrimitiveCategory()) {
+      case DATE:
+        Date date = ((DateWritableV2) input).get();
+        java.sql.Date oldDate = new java.sql.Date(date.toEpochMilli());
+        dateWritable.set(Date.valueOf(formatter.format(oldDate)));
+        return dateWritable;
+      case TIMESTAMP:
+        Timestamp timestamp = ((TimestampWritableV2) input).getTimestamp();
+        Timestamp adjustedTimestamp = Timestamp.valueOf(
+            formatter.format(new java.sql.Timestamp(timestamp.toEpochMilli())));
+        adjustedTimestamp.setNanos(timestamp.getNanos());
+        timestampWritable.set(adjustedTimestamp);
+        return timestampWritable;
+      default:
+        // Should never happen.
+        throw new IllegalStateException("Unexpected type in evaluating datetime_legacy_hybrid_calendar: " +
+            inputOI.getPrimitiveCategory());
+    }
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return getStandardDisplayString(getFuncName(), children);
+  }
+
+  @Override
+  protected String getFuncName() {
+    return "datetime_legacy_hybrid_calendar";
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDatetimeLegacyHybridCalendar.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDatetimeLegacyHybridCalendar.java
new file mode 100644
index 0000000..34bb9d3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDatetimeLegacyHybridCalendar.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+/**
+ * TestGenericUDFDatetimeLegacyHybridCalendar.
+ */
+public class TestGenericUDFDatetimeLegacyHybridCalendar {
+
+  public static void runAndVerify(GenericUDF udf,
+      Object arg1, Object expected) throws HiveException {
+    DeferredObject[] args = { new DeferredJavaObject(arg1) };
+    Object result = udf.evaluate(args);
+
+    if (expected == null) {
+      assertNull(result);
+    } else {
+      assertEquals(expected.toString(), result.toString());
+    }
+  }
+
+  @Test
+  public void testDateLegacyHybridCalendar() throws Exception {
+    ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+    GenericUDFDatetimeLegacyHybridCalendar udf = new GenericUDFDatetimeLegacyHybridCalendar();
+    ObjectInspector[] args2 = {valueOI, valueOI};
+    udf.initialize(args2);
+
+    runAndVerify(udf,
+        new DateWritableV2(Date.valueOf("0000-12-30")),
+        new DateWritableV2(Date.valueOf("0001-01-01")));
+
+    runAndVerify(udf,
+        new DateWritableV2(Date.valueOf("0601-03-07")),
+        new DateWritableV2(Date.valueOf("0601-03-04")));
+
+    runAndVerify(udf,
+        new DateWritableV2(Date.valueOf("1582-10-14")),
+        new DateWritableV2(Date.valueOf("1582-10-04")));
+
+    runAndVerify(udf,
+        new DateWritableV2(Date.valueOf("1582-10-15")),
+        new DateWritableV2(Date.valueOf("1582-10-15")));
+
+    runAndVerify(udf,
+        new DateWritableV2(Date.valueOf("2015-03-07")),
+        new DateWritableV2(Date.valueOf("2015-03-07")));
+  }
+
+  @Test
+  public void testDatetimeLegacyHybridCalendar() throws Exception {
+    ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+    GenericUDFDatetimeLegacyHybridCalendar udf = new GenericUDFDatetimeLegacyHybridCalendar();
+    ObjectInspector[] args2 = {valueOI, valueOI};
+    udf.initialize(args2);
+
+    runAndVerify(udf,
+        new TimestampWritableV2(Timestamp.valueOf("0601-03-07 17:00:00")),
+        new TimestampWritableV2(Timestamp.valueOf("0601-03-04 17:00:00")));
+
+    runAndVerify(udf,
+        new TimestampWritableV2(Timestamp.valueOf("1582-10-14 09:17:22.13")),
+        new TimestampWritableV2(Timestamp.valueOf("1582-10-04 09:17:22.13")));
+
+    runAndVerify(udf,
+        new TimestampWritableV2(Timestamp.valueOf("1582-10-15 11:17:22.13")),
+        new TimestampWritableV2(Timestamp.valueOf("1582-10-15 11:17:22.13")));
+
+    runAndVerify(udf,
+        new TimestampWritableV2(Timestamp.valueOf("2015-03-07 17:00:00")),
+        new TimestampWritableV2(Timestamp.valueOf("2015-03-07 17:00:00")));
+
+    // Make sure nanos are preserved
+    runAndVerify(udf,
+        new TimestampWritableV2(Timestamp.valueOf("0601-03-07 18:00:00.123456789")),
+        new TimestampWritableV2(Timestamp.valueOf("0601-03-04 18:00:00.123456789")));
+
+    runAndVerify(udf,
+        new TimestampWritableV2(Timestamp.valueOf("2018-07-07 18:00:00.123456789")),
+        new TimestampWritableV2(Timestamp.valueOf("2018-07-07 18:00:00.123456789")));
+  }
+}
diff --git a/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q b/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
new file mode 100644
index 0000000..ce58a34
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
@@ -0,0 +1,12 @@
+DESCRIBE FUNCTION datetime_legacy_hybrid_calendar;
+DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar;
+
+SELECT
+  '0601-03-07' AS dts,
+  CAST('0601-03-07' AS DATE) AS dt,
+  datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp;
+
+SELECT
+  '0501-03-07 17:03:00.4321' AS tss,
+  CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+  datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp;
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index d88a5f2..9db6845 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -97,6 +97,7 @@ date_add
 date_format
 date_sub
 datediff
+datetime_legacy_hybrid_calendar
 day
 dayofmonth
 dayofweek
@@ -448,6 +449,7 @@ date_add
 date_format
 date_sub
 datediff
+datetime_legacy_hybrid_calendar
 to_date
 validate_acid_sort_order
 PREHOOK: query: SHOW FUNCTIONS LIKE '%%%'
@@ -531,6 +533,7 @@ date_add
 date_format
 date_sub
 datediff
+datetime_legacy_hybrid_calendar
 day
 dayofmonth
 dayofweek
diff --git a/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out b/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
new file mode 100644
index 0000000..572c6c1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
@@ -0,0 +1,49 @@
+PREHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar
+POSTHOOK: type: DESCFUNCTION
+datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar 
+assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar
+POSTHOOK: type: DESCFUNCTION
+datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar 
+assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.
+Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced 
+by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming 
+that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, 
+i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single 
+discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFDatetimeLegacyHybridCalendar
+Function type:BUILTIN
+PREHOOK: query: SELECT
+  '0601-03-07' AS dts,
+  CAST('0601-03-07' AS DATE) AS dt,
+  datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  '0601-03-07' AS dts,
+  CAST('0601-03-07' AS DATE) AS dt,
+  datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0601-03-07	0601-03-07	0601-03-04
+PREHOOK: query: SELECT
+  '0501-03-07 17:03:00.4321' AS tss,
+  CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+  datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  '0501-03-07 17:03:00.4321' AS tss,
+  CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+  datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0501-03-07 17:03:00.4321	0501-03-07 17:03:00.4321	0501-03-05 17:03:00.4321