You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2019/11/25 14:06:01 UTC

[hive] branch master updated: HIVE-22483: Vectorize UDF datetime_legacy_hybrid_calendar (Karen Coppage, reviewed by Adam Szita)

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 523d766  HIVE-22483: Vectorize UDF datetime_legacy_hybrid_calendar (Karen Coppage, reviewed by Adam Szita)
523d766 is described below

commit 523d76650c6ef23cbdee69c77f948dfd25b83104
Author: Karen Coppage <kc...@gmail.com>
AuthorDate: Mon Nov 25 14:47:16 2019 +0100

    HIVE-22483: Vectorize UDF datetime_legacy_hybrid_calendar (Karen Coppage, reviewed by Adam Szita)
---
 .../ql/exec/vector/expressions/FuncDateToDate.java | 159 ++++++++++++++++
 .../expressions/FuncTimestampToTimestamp.java      | 159 ++++++++++++++++
 .../VectorUDFDatetimeLegacyHybridCalendarDate.java |  60 ++++++
 ...orUDFDatetimeLegacyHybridCalendarTimestamp.java |  61 ++++++
 .../GenericUDFDatetimeLegacyHybridCalendar.java    |  28 ++-
 .../TestVectorUDFDatetimeLegacyHybridCalendar.java | 209 +++++++++++++++++++++
 .../udf_datetime_legacy_hybrid_calendar.q          |  29 +++
 .../udf_datetime_legacy_hybrid_calendar.q.out      | 128 ++++++++++++-
 8 files changed, 816 insertions(+), 17 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDateToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDateToDate.java
new file mode 100644
index 0000000..d0e68ae
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDateToDate.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+import java.util.Arrays;
+
+/**
+ * This is a superclass for unary functions and expressions taking a single timestamp and returning
+ * a timestamp, that operate directly on the input and set the output.
+ */
+public abstract class FuncDateToDate extends VectorExpression {
+
+  private static final long serialVersionUID = 1L;
+  private final int inputColumn;
+
+  public FuncDateToDate(int inputColumn, int outputColumnNum) {
+    super(outputColumnNum);
+    this.inputColumn = inputColumn;
+  }
+
+  public FuncDateToDate() {
+    super();
+
+    // Dummy final assignments.
+    inputColumn = -1;
+  }
+
+  protected abstract void func(LongColumnVector outputColVector, LongColumnVector inputColVector, int i);
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+
+    int[] sel = batch.selected;
+    int n = batch.size;
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+
+    if (n == 0) {
+
+      // Nothing to do
+      return;
+    }
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, inputColVector, i);
+          }
+        }
+      } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
+        for(int i = 0; i != n; i++) {
+          func(outputColVector, inputColVector, i);
+        }
+      }
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
+          if (!inputColVector.isNull[i]) {
+            func(outputColVector, inputColVector, i);
+          }
+        }
+      } else {
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!inputColVector.isNull[i]) {
+            func(outputColVector, inputColVector, i);
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public String vectorExpressionParameters() {
+    return getColumnParamString(0, inputColumn);
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+    VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+    b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1)
+        .setArgumentTypes(getInputColumnType())
+        .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN);
+    return b.build();
+  }
+
+  protected VectorExpressionDescriptor.ArgumentType getInputColumnType() {
+    return VectorExpressionDescriptor.ArgumentType.DATE;
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToTimestamp.java
new file mode 100644
index 0000000..1a45ef9
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToTimestamp.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+/**
+ * This is a superclass for unary functions and expressions taking a single timestamp and returning
+ * a timestamp, that operate directly on the input and set the output.
+ */
+public abstract class FuncTimestampToTimestamp extends VectorExpression {
+
+  private static final long serialVersionUID = 1L;
+  private final int inputColumn;
+
+  public FuncTimestampToTimestamp(int inputColumn, int outputColumnNum) {
+    super(outputColumnNum);
+    this.inputColumn = inputColumn;
+  }
+
+  public FuncTimestampToTimestamp() {
+    super();
+
+    // Dummy final assignments.
+    inputColumn = -1;
+  }
+
+  protected abstract void func(
+      TimestampColumnVector outputColVector, TimestampColumnVector inputColVector, int i);
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
+
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+
+    if (n == 0) {
+
+      // Nothing to do
+      return;
+    }
+
+    // We do not need to do a column reset since we are carefully changing the output.
+    outputColVector.isRepeating = false;
+
+    if (inputColVector.isRepeating) {
+      if (inputColVector.noNulls || !inputIsNull[0]) {
+        // Set isNull before call in case it changes it mind.
+        outputIsNull[0] = false;
+        func(outputColVector, inputColVector, 0);
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+      }
+      outputColVector.isRepeating = true;
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+
+        // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+        if (!outputColVector.noNulls) {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            // Set isNull before call in case it changes it mind.
+            outputIsNull[i] = false;
+            func(outputColVector, inputColVector, i);
+          }
+        } else {
+          for(int j = 0; j != n; j++) {
+            final int i = sel[j];
+            func(outputColVector, inputColVector, i);
+          }
+        }
+      } else {
+        if (!outputColVector.noNulls) {
+
+          // Assume it is almost always a performance win to fill all of isNull so we can
+          // safely reset noNulls.
+          Arrays.fill(outputIsNull, false);
+          outputColVector.noNulls = true;
+        }
+        for(int i = 0; i != n; i++) {
+          func(outputColVector, inputColVector, i);
+        }
+      }
+    } else /* there are nulls in the inputColVector */ {
+
+      // Carefully handle NULLs...
+      outputColVector.noNulls = false;
+
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputColVector.isNull[i] = inputColVector.isNull[i];
+          if (!inputColVector.isNull[i]) {
+            func(outputColVector, inputColVector, i);
+          }
+        }
+      } else {
+        System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!inputColVector.isNull[i]) {
+            func(outputColVector, inputColVector, i);
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public String vectorExpressionParameters() {
+    return getColumnParamString(0, inputColumn);
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+    VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+    b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1)
+        .setArgumentTypes(getInputColumnType())
+        .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN);
+    return b.build();
+  }
+
+  protected VectorExpressionDescriptor.ArgumentType getInputColumnType() {
+    return VectorExpressionDescriptor.ArgumentType.TIMESTAMP;
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarDate.java
new file mode 100644
index 0000000..5b5fff5
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarDate.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
+import java.text.SimpleDateFormat;
+import java.util.TimeZone;
+
+/**
+ * Vectorized version of GenericUDFDatetimeLegacyHybridCalendar (datetime_legacy_hybrid_calendar).
+ * Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar assuming that its internal
+ * days/milliseconds since epoch is calculated using the proleptic Gregorian calendar.
+ * Extends {@link FuncDateToDate}
+ */
+
+public class VectorUDFDatetimeLegacyHybridCalendarDate extends FuncDateToDate {
+  private static final long serialVersionUID = 1L;
+
+  // SimpleDateFormat doesn't serialize well; it's also not thread-safe
+  private static final ThreadLocal<SimpleDateFormat> SIMPLE_DATE_FORMAT_THREAD_LOCAL =
+      ThreadLocal.withInitial(() -> {
+        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+        formatter.setLenient(false);
+        return formatter;
+      });
+
+  public VectorUDFDatetimeLegacyHybridCalendarDate() {
+    super();
+  }
+
+  public VectorUDFDatetimeLegacyHybridCalendarDate(int inputColumn, int outputColumnNum) {
+    super(inputColumn, outputColumnNum);
+  }
+
+  protected void func(LongColumnVector outputColVector, LongColumnVector inputColVector, int i) {
+    // get number of milliseconds from number of days
+    Date inputDate = Date.ofEpochDay((int) inputColVector.vector[i]);
+    java.sql.Date oldDate = new java.sql.Date(inputDate.toEpochMilli());
+    Date adjustedDate = Date.valueOf(SIMPLE_DATE_FORMAT_THREAD_LOCAL.get().format(oldDate));
+    outputColVector.vector[i] = adjustedDate.toEpochDay();
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarTimestamp.java
new file mode 100644
index 0000000..0bb93ff
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarTimestamp.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import java.text.SimpleDateFormat;
+import java.util.TimeZone;
+
+/**
+ * Vectorized version of GenericUDFDatetimeLegacyHybridCalendar (datetime_legacy_hybrid_calendar).
+ * Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar assuming that its internal
+ * days/milliseconds since epoch is calculated using the proleptic Gregorian calendar.
+ * Extends {@link FuncTimestampToTimestamp}
+ */
+
+public class VectorUDFDatetimeLegacyHybridCalendarTimestamp extends FuncTimestampToTimestamp {
+  private static final long serialVersionUID = 1L;
+
+  // SimpleDateFormat doesn't serialize well; it's also not thread-safe
+  private static final ThreadLocal<SimpleDateFormat> SIMPLE_DATE_FORMAT_THREAD_LOCAL =
+      ThreadLocal.withInitial(() -> {
+        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+        formatter.setLenient(false);
+        return formatter;
+      });
+
+  public VectorUDFDatetimeLegacyHybridCalendarTimestamp() {
+    super();
+  }
+
+  public VectorUDFDatetimeLegacyHybridCalendarTimestamp(int inputColumn, int outputColumnNum) {
+    super(inputColumn, outputColumnNum);
+  }
+
+  protected void func(TimestampColumnVector outputColVector, TimestampColumnVector inputColVector,
+      int i) {
+    String adjustedTimestampString = SIMPLE_DATE_FORMAT_THREAD_LOCAL.get()
+        .format(new java.sql.Timestamp(inputColVector.time[i]));
+    Timestamp adjustedTimestamp = Timestamp.valueOf(adjustedTimestampString);
+    outputColVector.time[i] = adjustedTimestamp.toEpochMilli();
+    // Nanos don't change
+    outputColVector.nanos[i] = inputColVector.nanos[i];
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
index 4a94b44..b2f11d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java
@@ -24,6 +24,9 @@ import org.apache.hadoop.hive.common.type.Timestamp;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDatetimeLegacyHybridCalendarDate;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDatetimeLegacyHybridCalendarTimestamp;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.DateWritableV2;
 import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
@@ -36,16 +39,25 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 
 
 /**
- * GenericUDFToProlepticGregorian.
+ * GenericUDFDatetimeLegacyHybridCalendar.
  */
 @Description(name = "datetime_legacy_hybrid_calendar",
-    value = "_FUNC_(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar \n"
-        + "assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.",
-    extended = "Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced \n"
-        + "by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming \n"
-        + "that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, \n"
-        + "i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single \n"
-        + "discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.")
+    value = "_FUNC_(date/timestamp) - Converts a date/timestamp to legacy hybrid Julian-Gregorian "
+        + "calendar\n"
+        + "assuming that its internal days/milliseconds since epoch is calculated using the "
+        + "proleptic Gregorian calendar.",
+    extended = "Converts a date/timestamp to legacy Gregorian-Julian hybrid calendar, i.e., "
+        + "calendar that supports both\n"
+        + "the Julian and Gregorian calendar systems with the support of a single discontinuity, "
+        + "which corresponds by\n"
+        + "default to the Gregorian date when the Gregorian calendar was instituted; assuming "
+        + "that its internal\n"
+        + "days/milliseconds since epoch is calculated using new proleptic Gregorian calendar "
+        + "(ISO 8601 standard), which\n"
+        + "is produced by extending the Gregorian calendar backward to dates preceding its "
+        + "official introduction in 1582.\n")
+@VectorizedExpressions({VectorUDFDatetimeLegacyHybridCalendarTimestamp.class,
+    VectorUDFDatetimeLegacyHybridCalendarDate.class })
 public class GenericUDFDatetimeLegacyHybridCalendar extends GenericUDF {
 
   private transient PrimitiveObjectInspector inputOI;
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFDatetimeLegacyHybridCalendar.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFDatetimeLegacyHybridCalendar.java
new file mode 100644
index 0000000..08fabdd
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFDatetimeLegacyHybridCalendar.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDatetimeLegacyHybridCalendar;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests VectorUDFDatetimeLegacyHybridCalendarTimestamp and
+ * VectorUDFDatetimeLegacyHybridCalendarDate.
+ */
+public class TestVectorUDFDatetimeLegacyHybridCalendar {
+
+  @Test
+  public void testVectorUDFDatetimeLegacyHybridCalendarTimestamp() throws HiveException {
+    VectorizedRowBatch batch = getFreshBatchOfTimestamps(VectorizedRowBatch.DEFAULT_SIZE);
+    Assert.assertTrue(((TimestampColumnVector) batch.cols[1]).noNulls);
+    Assert.assertFalse(((TimestampColumnVector) batch.cols[1]).isRepeating);
+    verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch);
+    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
+    verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch);
+
+    batch = getFreshBatchOfTimestamps(1);
+    batch.cols[0].isRepeating = true; //
+    verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch);
+    batch.cols[0].noNulls = false;
+    batch.cols[0].isNull[0] = true;
+    verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch);
+
+    batch = getFreshBatchOfTimestamps(3);
+    batch.cols[0].isRepeating = false;
+    batch.selectedInUse = true;
+    batch.selected = new int[] {0, 1, 2};
+    verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch);
+    batch.cols[0].noNulls = false;
+    batch.cols[0].isNull[0] = true;
+    verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch);
+  }
+
+  private VectorizedRowBatch getFreshBatchOfTimestamps(int size) {
+    return getVectorizedRowBatch(new java.sql.Timestamp[] {
+        new java.sql.Timestamp(Timestamp.valueOf("0001-01-01 00:00:00").toEpochMilli()),
+        new java.sql.Timestamp(Timestamp.valueOf("1400-01-01 00:30:00.123456").toEpochMilli()),
+        new java.sql.Timestamp(Timestamp.valueOf("1500-01-01 00:30:00").toEpochMilli()),
+        new java.sql.Timestamp(Timestamp.valueOf("1583-01-01 00:30:00.123").toEpochMilli()),
+        },
+        size);
+  }
+
+  /**
+   * Input array is used to fill the entire specified size of the vector row batch.
+   */
+  private VectorizedRowBatch getVectorizedRowBatch(java.sql.Timestamp[] inputs, int size) {
+    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
+    TimestampColumnVector inputCol = new TimestampColumnVector(size);
+    for (int i = 0; i < size; i++) {
+      inputCol.set(i, inputs[i % inputs.length]);
+    }
+    batch.cols[0] = inputCol;
+    batch.cols[1] = new TimestampColumnVector(size);
+    batch.size = size;
+    return batch;
+  }
+
+  private void verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(VectorizedRowBatch batch)
+      throws HiveException  {
+    GenericUDF genUdf = new GenericUDFDatetimeLegacyHybridCalendar();
+    genUdf.initialize(new ObjectInspector[]{
+        PrimitiveObjectInspectorFactory.writableTimestampObjectInspector});
+
+    VectorExpression vecUdf = new VectorUDFDatetimeLegacyHybridCalendarTimestamp(0, 1);
+    vecUdf.evaluate(batch);
+    final int in = 0;
+    final int out = 1;
+
+    for (int i = 0; i < batch.size; i++) {
+      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
+        java.sql.Timestamp input =
+            ((TimestampColumnVector) batch.cols[in]).asScratchTimestamp(i);
+        java.sql.Timestamp result =
+            ((TimestampColumnVector) batch.cols[out]).asScratchTimestamp(i);
+        compareToUDFDatetimeLegacyHybridCalendar(genUdf, input, result);
+      } else {
+        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
+      }
+    }
+  }
+
+  private void compareToUDFDatetimeLegacyHybridCalendar(
+      GenericUDF udf, java.sql.Timestamp in, java.sql.Timestamp out) throws HiveException {
+    TimestampWritableV2 tswInput = new TimestampWritableV2(
+        org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(in.getTime(), in.getNanos()));
+    TimestampWritableV2 tswOutput = (TimestampWritableV2) udf
+        .evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(tswInput)});
+    Assert.assertEquals(tswOutput.getTimestamp(), Timestamp.ofEpochMilli(out.getTime()));
+    Assert.assertEquals(tswOutput.getNanos(), out.getNanos());
+  }
+
+  @Test
+  public void testVectorUDFDatetimeLegacyHybridCalendarDate() throws HiveException {
+    VectorizedRowBatch batch = getFreshBatchOfDates(VectorizedRowBatch.DEFAULT_SIZE);
+    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
+    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
+    verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch);
+    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
+    verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch);
+
+    batch = getFreshBatchOfDates(1);
+    batch.cols[0].isRepeating = true; //
+    verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch);
+    batch.cols[0].noNulls = false;
+    batch.cols[0].isNull[0] = true;
+    verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch);
+
+    batch = getFreshBatchOfDates(3);
+    batch.cols[0].isRepeating = false;
+    batch.selectedInUse = true;
+    batch.selected = new int[] {0, 1, 2};
+    verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch);
+    batch.cols[0].noNulls = false;
+    batch.cols[0].isNull[0] = true;
+    verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch);
+  }
+
+  private VectorizedRowBatch getFreshBatchOfDates(int size) {
+    return getVectorizedRowBatch(new Long[] {
+        (long) Date.valueOf("0001-01-01").toEpochDay(),
+        (long) Date.valueOf("1400-01-01").toEpochDay(),
+        (long) Date.valueOf("1500-01-01").toEpochDay(),
+        (long) Date.valueOf("1583-01-01").toEpochDay(),
+        },
+        size);
+  }
+
+  /**
+   * Input array is used to fill the entire specified size of the vector row batch.
+   */
+  private VectorizedRowBatch getVectorizedRowBatch(Long[] inputs, int size) {
+    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
+    LongColumnVector inputCol = new LongColumnVector(size);
+    for (int i = 0; i < size; i++) {
+      inputCol.vector[i] = inputs[i % inputs.length];
+    }
+    batch.cols[0] = inputCol;
+    batch.cols[1] = new LongColumnVector(size);
+    batch.size = size;
+    return batch;
+  }
+
+
+  private void verifyVectorUDFDatetimeLegacyHybridCalendarDate(VectorizedRowBatch batch)
+      throws HiveException {
+    GenericUDF genUdf = new GenericUDFDatetimeLegacyHybridCalendar();
+    genUdf.initialize(
+        new ObjectInspector[] {PrimitiveObjectInspectorFactory.writableDateObjectInspector});
+
+    VectorExpression vecUdf = new VectorUDFDatetimeLegacyHybridCalendarDate(0, 1);
+    vecUdf.evaluate(batch);
+    final int in = 0;
+    final int out = 1;
+
+    for (int i = 0; i < batch.size; i++) {
+      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
+        long input = ((LongColumnVector) batch.cols[in]).vector[i];
+        long output = ((LongColumnVector) batch.cols[out]).vector[i];
+        compareToUDFDatetimeLegacyHybridCalendar(genUdf, input, output);
+      } else {
+        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
+      }
+    }
+  }
+
+  private void compareToUDFDatetimeLegacyHybridCalendar(GenericUDF udf, long in, long out)
+      throws HiveException {
+    DateWritableV2 dateWInput = new DateWritableV2((int) in);
+    DateWritableV2 dateWOutput = (DateWritableV2) udf
+        .evaluate(new GenericUDF.DeferredObject[] {
+            new GenericUDF.DeferredJavaObject(dateWInput)});
+    Assert.assertEquals(dateWOutput.get(), Date.ofEpochDay((int) out));
+  }
+}
diff --git a/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q b/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
index ce58a34..dab733d 100644
--- a/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
+++ b/ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q
@@ -10,3 +10,32 @@ SELECT
   '0501-03-07 17:03:00.4321' AS tss,
   CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
   datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp;
+
+--newer timestamps shouldn't be changed
+SELECT
+  '1600-03-07 17:03:00.4321' AS tss,
+  CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+  datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp;
+
+
+--test vectorized UDF--
+set hive.fetch.task.conversion=none;
+
+create table datetime_legacy_hybrid_calendar(dt date, ts timestamp) stored as orc;
+insert into datetime_legacy_hybrid_calendar values
+('0601-03-07', '0501-03-07 17:03:00.4321'),
+--post-1582 datetimes shouldn't be changed
+('1600-03-07', '1600-03-07 17:03:00.4321');
+
+EXPLAIN
+SELECT
+  dt, datetime_legacy_hybrid_calendar(dt) AS dtp,
+  ts, datetime_legacy_hybrid_calendar(ts) AS tsp
+FROM datetime_legacy_hybrid_calendar;
+
+SELECT
+  dt, datetime_legacy_hybrid_calendar(dt) AS dtp,
+  ts, datetime_legacy_hybrid_calendar(ts) AS tsp
+FROM datetime_legacy_hybrid_calendar;
+
+drop table datetime_legacy_hybrid_calendar;
diff --git a/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out b/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
index 572c6c1..bd22442 100644
--- a/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
+++ b/ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out
@@ -2,19 +2,20 @@ PREHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar
 POSTHOOK: type: DESCFUNCTION
-datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar 
-assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.
+datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar
+assuming that its internal days/milliseconds since epoch is calculated using the proleptic Gregorian calendar.
 PREHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar
 POSTHOOK: type: DESCFUNCTION
-datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar 
-assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.
-Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced 
-by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming 
-that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, 
-i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single 
-discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.
+datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar
+assuming that its internal days/milliseconds since epoch is calculated using the proleptic Gregorian calendar.
+Converts a date/timestamp to legacy Gregorian-Julian hybrid calendar, i.e., calendar that supports both
+the Julian and Gregorian calendar systems with the support of a single discontinuity, which corresponds by
+default to the Gregorian date when the Gregorian calendar was instituted; assuming that its internal
+days/milliseconds since epoch is calculated using new proleptic Gregorian calendar (ISO 8601 standard), which
+is produced by extending the Gregorian calendar backward to dates preceding its official introduction in 1582.
+
 Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFDatetimeLegacyHybridCalendar
 Function type:BUILTIN
 PREHOOK: query: SELECT
@@ -47,3 +48,112 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 #### A masked pattern was here ####
 0501-03-07 17:03:00.4321	0501-03-07 17:03:00.4321	0501-03-05 17:03:00.4321
+PREHOOK: query: SELECT
+  '1600-03-07 17:03:00.4321' AS tss,
+  CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+  datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  '1600-03-07 17:03:00.4321' AS tss,
+  CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts,
+  datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1600-03-07 17:03:00.4321	1600-03-07 17:03:00.4321	1600-03-07 17:03:00.4321
+PREHOOK: query: create table datetime_legacy_hybrid_calendar(dt date, ts timestamp) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@datetime_legacy_hybrid_calendar
+POSTHOOK: query: create table datetime_legacy_hybrid_calendar(dt date, ts timestamp) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@datetime_legacy_hybrid_calendar
+PREHOOK: query: insert into datetime_legacy_hybrid_calendar values
+('0601-03-07', '0501-03-07 17:03:00.4321'),
+
+('1600-03-07', '1600-03-07 17:03:00.4321')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@datetime_legacy_hybrid_calendar
+POSTHOOK: query: insert into datetime_legacy_hybrid_calendar values
+('0601-03-07', '0501-03-07 17:03:00.4321'),
+
+('1600-03-07', '1600-03-07 17:03:00.4321')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@datetime_legacy_hybrid_calendar
+POSTHOOK: Lineage: datetime_legacy_hybrid_calendar.dt SCRIPT []
+POSTHOOK: Lineage: datetime_legacy_hybrid_calendar.ts SCRIPT []
+PREHOOK: query: EXPLAIN
+SELECT
+  dt, datetime_legacy_hybrid_calendar(dt) AS dtp,
+  ts, datetime_legacy_hybrid_calendar(ts) AS tsp
+FROM datetime_legacy_hybrid_calendar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@datetime_legacy_hybrid_calendar
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT
+  dt, datetime_legacy_hybrid_calendar(dt) AS dtp,
+  ts, datetime_legacy_hybrid_calendar(ts) AS tsp
+FROM datetime_legacy_hybrid_calendar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@datetime_legacy_hybrid_calendar
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: datetime_legacy_hybrid_calendar
+            Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: dt (type: date), datetime_legacy_hybrid_calendar(dt) (type: date), ts (type: timestamp), datetime_legacy_hybrid_calendar(ts) (type: timestamp)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 2 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT
+  dt, datetime_legacy_hybrid_calendar(dt) AS dtp,
+  ts, datetime_legacy_hybrid_calendar(ts) AS tsp
+FROM datetime_legacy_hybrid_calendar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@datetime_legacy_hybrid_calendar
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  dt, datetime_legacy_hybrid_calendar(dt) AS dtp,
+  ts, datetime_legacy_hybrid_calendar(ts) AS tsp
+FROM datetime_legacy_hybrid_calendar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@datetime_legacy_hybrid_calendar
+#### A masked pattern was here ####
+0601-03-07	0601-03-04	0501-03-07 17:03:00.4321	0501-03-05 17:03:00.4321
+1600-03-07	1600-03-07	1600-03-07 17:03:00.4321	1600-03-07 17:03:00.4321
+PREHOOK: query: drop table datetime_legacy_hybrid_calendar
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@datetime_legacy_hybrid_calendar
+PREHOOK: Output: default@datetime_legacy_hybrid_calendar
+POSTHOOK: query: drop table datetime_legacy_hybrid_calendar
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@datetime_legacy_hybrid_calendar
+POSTHOOK: Output: default@datetime_legacy_hybrid_calendar