You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/09/28 21:43:35 UTC
[hive] branch master updated: HIVE-22241: Implement UDF to
interpret date/timestamp using its internal representation and
Gregorian-Julian hybrid calendar (Jesus Camacho Rodriguez,
reviewed by Adam Szita)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 52224f4 HIVE-22241: Implement UDF to interpret date/timestamp using its internal representation and Gregorian-Julian hybrid calendar (Jesus Camacho Rodriguez, reviewed by Adam Szita)
52224f4 is described below
commit 52224f4adc4ac61e5b5349d1c1cc5f94882d183b
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Tue Sep 24 21:17:06 2019 -0700
HIVE-22241: Implement UDF to interpret date/timestamp using its internal representation and Gregorian-Julian hybrid calendar (Jesus Camacho Rodriguez, reviewed by Adam Szita)
Close apache/hive#789
---
.../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +
.../GenericUDFDatetimeLegacyHybridCalendar.java | 137 +++++++++++++++++++++
...TestGenericUDFDatetimeLegacyHybridCalendar.java | 112 +++++++++++++++++
.../udf_datetime_legacy_hybrid_calendar.q | 12 ++
.../results/clientpositive/show_functions.q.out | 3 +
.../udf_datetime_legacy_hybrid_calendar.q.out | 49 ++++++++
6 files changed, 315 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index b0d7a4e..1616409 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -522,6 +522,8 @@ public final class FunctionRegistry {
system.registerGenericUDF("unix_timestamp", GenericUDFUnixTimeStamp.class);
system.registerGenericUDF("to_unix_timestamp", GenericUDFToUnixTimeStamp.class);
+ system.registerGenericUDF("datetime_legacy_hybrid_calendar", GenericUDFDatetimeLegacyHybridCalendar.class);
+
system.registerGenericUDF("internal_interval", GenericUDFInternalInterval.class);
system.registerGenericUDF("to_epoch_milli", GenericUDFEpochMilli.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
new file mode 100644
index 0000000..4a94b44
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.text.SimpleDateFormat;
+import java.util.TimeZone;
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+
+/**
+ * GenericUDFToProlepticGregorian.
+ */
+@Description(name = "datetime_legacy_hybrid_calendar",
+ value = "_FUNC_(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar \n"
+ + "assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.",
+ extended = "Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced \n"
+ + "by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming \n"
+ + "that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, \n"
+ + "i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single \n"
+ + "discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.")
+public class GenericUDFDatetimeLegacyHybridCalendar extends GenericUDF {
+
+ private transient PrimitiveObjectInspector inputOI;
+ private transient PrimitiveObjectInspector resultOI;
+ private transient Converter converter;
+ private transient SimpleDateFormat formatter;
+
+ private DateWritableV2 dateWritable = new DateWritableV2();
+ private TimestampWritableV2 timestampWritable = new TimestampWritableV2();
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ if (arguments.length < 1) {
+ throw new UDFArgumentLengthException(
+ "The function datetime_legacy_hybrid_calendar requires at least one argument, got "
+ + arguments.length);
+ }
+
+ try {
+ inputOI = (PrimitiveObjectInspector) arguments[0];
+ PrimitiveCategory pc = inputOI.getPrimitiveCategory();
+ switch (pc) {
+ case DATE:
+ formatter = new SimpleDateFormat("yyyy-MM-dd");
+ formatter.setLenient(false);
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ converter = ObjectInspectorConverters.getConverter(inputOI,
+ PrimitiveObjectInspectorFactory.writableDateObjectInspector);
+ resultOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ break;
+ case TIMESTAMP:
+ formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ formatter.setLenient(false);
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ converter = ObjectInspectorConverters.getConverter(inputOI,
+ PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
+ resultOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ break;
+ default:
+ throw new UDFArgumentException(
+ "datetime_legacy_hybrid_calendar only allows date or timestamp types");
+ }
+ } catch (ClassCastException e) {
+ throw new UDFArgumentException(
+ "The function datetime_legacy_hybrid_calendar takes only primitive types");
+ }
+
+ return resultOI;
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ Object input = arguments[0].get();
+ if (input == null) {
+ return null;
+ }
+
+ input = converter.convert(input);
+
+ switch (resultOI.getPrimitiveCategory()) {
+ case DATE:
+ Date date = ((DateWritableV2) input).get();
+ java.sql.Date oldDate = new java.sql.Date(date.toEpochMilli());
+ dateWritable.set(Date.valueOf(formatter.format(oldDate)));
+ return dateWritable;
+ case TIMESTAMP:
+ Timestamp timestamp = ((TimestampWritableV2) input).getTimestamp();
+ Timestamp adjustedTimestamp = Timestamp.valueOf(
+ formatter.format(new java.sql.Timestamp(timestamp.toEpochMilli())));
+ adjustedTimestamp.setNanos(timestamp.getNanos());
+ timestampWritable.set(adjustedTimestamp);
+ return timestampWritable;
+ default:
+ // Should never happen.
+ throw new IllegalStateException("Unexpected type in evaluating datetime_legacy_hybrid_calendar: " +
+ inputOI.getPrimitiveCategory());
+ }
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return getStandardDisplayString(getFuncName(), children);
+ }
+
+ @Override
+ protected String getFuncName() {
+ return "datetime_legacy_hybrid_calendar";
+ }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDatetimeLegacyHybridCalendar.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDatetimeLegacyHybridCalendar.java
new file mode 100644
index 0000000..34bb9d3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDatetimeLegacyHybridCalendar.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+/**
+ * TestGenericUDFDatetimeLegacyHybridCalendar.
+ */
+public class TestGenericUDFDatetimeLegacyHybridCalendar {
+
+ public static void runAndVerify(GenericUDF udf,
+ Object arg1, Object expected) throws HiveException {
+ DeferredObject[] args = { new DeferredJavaObject(arg1) };
+ Object result = udf.evaluate(args);
+
+ if (expected == null) {
+ assertNull(result);
+ } else {
+ assertEquals(expected.toString(), result.toString());
+ }
+ }
+
+ @Test
+ public void testDateLegacyHybridCalendar() throws Exception {
+ ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+ GenericUDFDatetimeLegacyHybridCalendar udf = new GenericUDFDatetimeLegacyHybridCalendar();
+ ObjectInspector[] args2 = {valueOI, valueOI};
+ udf.initialize(args2);
+
+ runAndVerify(udf,
+ new DateWritableV2(Date.valueOf("0000-12-30")),
+ new DateWritableV2(Date.valueOf("0001-01-01")));
+
+ runAndVerify(udf,
+ new DateWritableV2(Date.valueOf("0601-03-07")),
+ new DateWritableV2(Date.valueOf("0601-03-04")));
+
+ runAndVerify(udf,
+ new DateWritableV2(Date.valueOf("1582-10-14")),
+ new DateWritableV2(Date.valueOf("1582-10-04")));
+
+ runAndVerify(udf,
+ new DateWritableV2(Date.valueOf("1582-10-15")),
+ new DateWritableV2(Date.valueOf("1582-10-15")));
+
+ runAndVerify(udf,
+ new DateWritableV2(Date.valueOf("2015-03-07")),
+ new DateWritableV2(Date.valueOf("2015-03-07")));
+ }
+
+ @Test
+ public void testDatetimeLegacyHybridCalendar() throws Exception {
+ ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+ GenericUDFDatetimeLegacyHybridCalendar udf = new GenericUDFDatetimeLegacyHybridCalendar();
+ ObjectInspector[] args2 = {valueOI, valueOI};
+ udf.initialize(args2);
+
+ runAndVerify(udf,
+ new TimestampWritableV2(Timestamp.valueOf("0601-03-07 17:00:00")),
+ new TimestampWritableV2(Timestamp.valueOf("0601-03-04 17:00:00")));
+
+ runAndVerify(udf,
+ new TimestampWritableV2(Timestamp.valueOf("1582-10-14 09:17:22.13")),
+ new TimestampWritableV2(Timestamp.valueOf("1582-10-04 09:17:22.13")));
+
+ runAndVerify(udf,
+ new TimestampWritableV2(Timestamp.valueOf("1582-10-15 11:17:22.13")),
+ new TimestampWritableV2(Timestamp.valueOf("1582-10-15 11:17:22.13")));
+
+ runAndVerify(udf,
+ new TimestampWritableV2(Timestamp.valueOf("2015-03-07 17:00:00")),
+ new TimestampWritableV2(Timestamp.valueOf("2015-03-07 17:00:00")));
+
+ // Make sure nanos are preserved
+ runAndVerify(udf,
+ new TimestampWritableV2(Timestamp.valueOf("0601-03-07 18:00:00.123456789")),
+ new TimestampWritableV2(Timestamp.valueOf("0601-03-04 18:00:00.123456789")));
+
+ runAndVerify(udf,
+ new TimestampWritableV2(Timestamp.valueOf("2018-07-07 18:00:00.123456789")),
+ new TimestampWritableV2(Timestamp.valueOf("2018-07-07 18:00:00.123456789")));
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q b/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
new file mode 100644
index 0000000..ce58a34
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
@@ -0,0 +1,12 @@
+DESCRIBE FUNCTION datetime_legacy_hybrid_calendar;
+DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar;
+
+SELECT
+ '0601-03-07' AS dts,
+ CAST('0601-03-07' AS DATE) AS dt,
+ datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp;
+
+SELECT
+ '0501-03-07 17:03:00.4321' AS tss,
+ CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+ datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp;
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index d88a5f2..9db6845 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -97,6 +97,7 @@ date_add
date_format
date_sub
datediff
+datetime_legacy_hybrid_calendar
day
dayofmonth
dayofweek
@@ -448,6 +449,7 @@ date_add
date_format
date_sub
datediff
+datetime_legacy_hybrid_calendar
to_date
validate_acid_sort_order
PREHOOK: query: SHOW FUNCTIONS LIKE '%%%'
@@ -531,6 +533,7 @@ date_add
date_format
date_sub
datediff
+datetime_legacy_hybrid_calendar
day
dayofmonth
dayofweek
diff --git a/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out b/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
new file mode 100644
index 0000000..572c6c1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
@@ -0,0 +1,49 @@
+PREHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar
+POSTHOOK: type: DESCFUNCTION
+datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar
+assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar
+POSTHOOK: type: DESCFUNCTION
+datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar
+assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.
+Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced
+by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming
+that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar,
+i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single
+discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFDatetimeLegacyHybridCalendar
+Function type:BUILTIN
+PREHOOK: query: SELECT
+ '0601-03-07' AS dts,
+ CAST('0601-03-07' AS DATE) AS dt,
+ datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ '0601-03-07' AS dts,
+ CAST('0601-03-07' AS DATE) AS dt,
+ datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0601-03-07 0601-03-07 0601-03-04
+PREHOOK: query: SELECT
+ '0501-03-07 17:03:00.4321' AS tss,
+ CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+ datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ '0501-03-07 17:03:00.4321' AS tss,
+ CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+ datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+0501-03-07 17:03:00.4321 0501-03-07 17:03:00.4321 0501-03-05 17:03:00.4321