You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/03/26 19:53:40 UTC
[hive] branch master updated: HIVE-21290: Restore historical way of handling timestamps in Parquet while keeping the new semantics at the same time (Karen Coppage, reviewed by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 10dfb15  HIVE-21290: Restore historical way of handling timestamps in Parquet while keeping the new semantics at the same time (Karen Coppage, reviewed by Jesus Camacho Rodriguez)
10dfb15 is described below

commit 10dfb151e9f2dfbdb4de254a99866261a922c479
Author: Karen Coppage <ka...@cloudera.com>
AuthorDate: Tue Mar 26 12:52:46 2019 -0700

    HIVE-21290: Restore historical way of handling timestamps in Parquet while keeping the new semantics at the same time (Karen Coppage, reviewed by Jesus Camacho Rodriguez)
---
 .../hadoop/hive/common/type/TimestampTZUtil.java   | 20 ++++++-
 .../hive/ql/io/parquet/convert/ETypeConverter.java | 13 ++++-
 .../io/parquet/read/DataWritableReadSupport.java   | 34 +++++++++++
 .../ql/io/parquet/timestamp/NanoTimeUtils.java     | 67 ++++++++++++++++------
 .../parquet/vector/BaseVectorizedColumnReader.java | 10 +++-
 .../vector/ParquetDataColumnReaderFactory.java     | 31 ++++++----
 .../parquet/vector/VectorizedListColumnReader.java |  5 +-
 .../vector/VectorizedParquetRecordReader.java      | 21 ++++---
 .../vector/VectorizedPrimitiveColumnReader.java    |  8 ++-
 .../io/parquet/write/DataWritableWriteSupport.java |  7 ++-
 .../parquet/serde/TestParquetTimestampUtils.java   | 46 ++++++++++-----
 .../queries/clientpositive/parquet_external_time.q |  1 +
 .../clientpositive/parquet_historical_timestamp.q  | 16 ++++++
 .../results/clientpositive/parquet_analyze.q.out   |  4 +-
 .../parquet_historical_timestamp.q.out             | 50 ++++++++++++++++
 .../results/clientpositive/parquet_stats.q.out     |  2 +-
 .../clientpositive/parquet_vectorization_0.q.out   | 20 +++----
 .../spark/parquet_vectorization_0.q.out            | 20 +++----
 18 files changed, 289 insertions(+), 86 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
index 213650c..4708d35 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
+++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
@@ -17,12 +17,13 @@
  */
 package org.apache.hadoop.hive.common.type;
 
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
 import java.time.DateTimeException;
+import java.time.Instant;
 import java.time.LocalDate;
+import java.time.LocalDateTime;
 import java.time.LocalTime;
 import java.time.ZoneId;
+import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatterBuilder;
@@ -145,4 +146,19 @@ public class TimestampTZUtil {
     }
   }
 
+  /**
+   * Timestamps are technically time zone agnostic, and this method sort of cheats its logic.
+   * Timestamps are supposed to represent nanos since [UTC epoch]. Here,
+   * the input timestamp represents nanoseconds since [epoch at fromZone], and
+   * we return a Timestamp representing nanoseconds since [epoch at toZone].
+   */
+  public static Timestamp convertTimestampToZone(Timestamp ts, ZoneId fromZone, ZoneId toZone) {
+    // get nanos since [epoch at fromZone]
+    Instant instant = convert(ts, fromZone).getZonedDateTime().toInstant();
+    // get [local time at toZone]
+    LocalDateTime localDateTimeAtToZone = LocalDateTime.ofInstant(instant, toZone);
+    // get nanos between [epoch at toZone] and [local time at toZone]
+    return Timestamp.ofEpochSecond(localDateTimeAtToZone.toEpochSecond(ZoneOffset.UTC),
+        localDateTimeAtToZone.getNano());
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 9010ac3..350ae2d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -19,6 +19,7 @@ import java.util.Map;
 
 import org.apache.hadoop.hive.common.type.Timestamp;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
 import org.apache.hadoop.hive.serde.serdeConstants;
@@ -599,11 +600,17 @@ public enum ETypeConverter {
         protected TimestampWritableV2 convert(Binary binary) {
           NanoTime nt = NanoTime.fromBinary(binary);
           Map<String, String> metadata = parent.getMetadata();
-          //Current Hive parquet timestamp implementation stores it in UTC, but other components do not do that.
-          //If this file written by current Hive implementation itself, we need to do the reverse conversion, else skip the conversion.
+          // Current Hive parquet timestamp implementation stores timestamps in UTC, but other
+          // components do not. In this case we skip timestamp conversion.
+          // If this file is written by a version of hive before HIVE-21290, file metadata will
+          // not contain the writer timezone, so we convert the timestamp to the system (reader)
+          // time zone.
+          // If file is written by current Hive implementation, we convert timestamps to the writer
+          // time zone in order to emulate time zone agnostic behavior.
           boolean skipConversion = Boolean.parseBoolean(
               metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname));
-          Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion);
+          Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion,
+              DataWritableReadSupport.getWriterTimeZoneId(metadata));
           return new TimestampWritableV2(ts);
         }
       };
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
index 7f2a684..30f3d17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
@@ -13,6 +13,8 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.read;
 
+import java.time.DateTimeException;
+import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -26,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter;
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.FieldNode;
 import org.apache.hadoop.hive.ql.optimizer.NestedColumnFieldPruningUtils;
@@ -263,6 +266,25 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
   }
 
   /**
+   * Get a valid zoneId from some metadata, otherwise return null.
+   */
+  public static ZoneId getWriterTimeZoneId(Map<String, String> metadata) {
+    if (metadata == null) {
+      return null;
+    }
+    String value = metadata.get(DataWritableWriteSupport.WRITER_TIMEZONE);
+    try {
+      if (value != null) {
+        return ZoneId.of(value);
+      }
+    } catch (DateTimeException e) {
+      throw new RuntimeException("Can't parse writer time zone stored in file metadata", e);
+    }
+
+    return null;
+  }
+
+  /**
    * Return the columns which contains required nested attribute level
    * E.g., given struct a:<x:int, y:int> while 'x' is required and 'y' is not, the method will return
    * a pruned struct for 'a' which only contains the attribute 'x'
@@ -448,11 +470,23 @@ public class DataWritableReadSupport extends ReadSupport<ArrayWritable> {
       throw new IllegalStateException("ReadContext not initialized properly. " +
         "Don't know the Hive Schema.");
     }
+
     String key = HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname;
     if (!metadata.containsKey(key)) {
       metadata.put(key, String.valueOf(HiveConf.getBoolVar(
         configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)));
     }
+
+    String writerTimezone = DataWritableWriteSupport.WRITER_TIMEZONE;
+    if (!metadata.containsKey(writerTimezone)) {
+      if (keyValueMetaData.containsKey(writerTimezone)) {
+        metadata.put(writerTimezone, keyValueMetaData.get(writerTimezone));
+      }
+    } else if (!metadata.get(writerTimezone).equals(keyValueMetaData.get(writerTimezone))) {
+      throw new IllegalStateException("Metadata contains a writer time zone that does not match "
+          + "file footer's writer time zone.");
+    }
+
     return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo);
   }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
index bf78d8c..44a4858 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
@@ -13,6 +13,8 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.timestamp;
 
+import java.time.ZoneId;
+import java.time.ZoneOffset;
 import java.util.Calendar;
 import java.util.GregorianCalendar;
 import java.util.TimeZone;
@@ -21,6 +23,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.hive.common.type.Timestamp;
 
 import jodd.datetime.JDateTime;
+import org.apache.hadoop.hive.common.type.TimestampTZUtil;
 
 /**
  * Utilities for converting from java.sql.Timestamp to parquet timestamp.
@@ -33,32 +36,40 @@ public class NanoTimeUtils {
    static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1);
 
    private static final ThreadLocal<Calendar> parquetGMTCalendar = new ThreadLocal<Calendar>();
-   private static final ThreadLocal<Calendar> parquetLocalCalendar = new ThreadLocal<Calendar>();
 
    private static Calendar getGMTCalendar() {
      //Calendar.getInstance calculates the current-time needlessly, so cache an instance.
      if (parquetGMTCalendar.get() == null) {
        parquetGMTCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
      }
+     parquetGMTCalendar.get().clear();
      return parquetGMTCalendar.get();
    }
 
-   private static Calendar getLocalCalendar() {
-     if (parquetLocalCalendar.get() == null) {
-       parquetLocalCalendar.set(Calendar.getInstance());
-     }
-     return parquetLocalCalendar.get();
-   }
-
-   public static Calendar getCalendar(boolean skipConversion) {
-     Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar();
-     calendar.clear(); // Reset all fields before reusing this instance
-     return calendar;
+   public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) {
+     return getNanoTime(ts, skipConversion, null);
    }
 
-   public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) {
+  /**
+   * Gets a NanoTime object, which represents timestamps as nanoseconds since epoch, from a
+   * Timestamp object. Parquet will store this NanoTime object as int96.
+   *
+   * If skipConversion flag is on, the timestamp will be converted to NanoTime as-is, i.e.
+   * timeZoneId argument will be ignored.
+   * If skipConversion is off, timestamp can be converted from a given time zone (timeZoneId) to UTC
+   * if timeZoneId is present, and if not present: from system time zone to UTC, before being
+   * converted to NanoTime.
+   * (See TimestampDataWriter#write for current Hive writing procedure.)
+   */
+   public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion, ZoneId timeZoneId) {
+     if (skipConversion) {
+       timeZoneId = ZoneOffset.UTC;
+     } else if (timeZoneId == null) {
+       timeZoneId = TimeZone.getDefault().toZoneId();
+     }
+     ts = TimestampTZUtil.convertTimestampToZone(ts, timeZoneId, ZoneOffset.UTC);
 
-     Calendar calendar = getCalendar(skipConversion);
+     Calendar calendar = getGMTCalendar();
      calendar.setTimeInMillis(ts.toEpochMilli());
      int year = calendar.get(Calendar.YEAR);
      if (calendar.get(Calendar.ERA) == GregorianCalendar.BC) {
@@ -79,7 +90,29 @@ public class NanoTimeUtils {
      return new NanoTime(days, nanosOfDay);
    }
 
-   public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) {
+  public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) {
+    return getTimestamp(nt, skipConversion, null);
+  }
+
+  /**
+   * Gets a Timestamp object from a NanoTime object, which represents timestamps as nanoseconds
+   * since epoch. Parquet stores these as int96.
+   *
+   * Before converting to NanoTime, we may convert the timestamp to a desired time zone
+   * (timeZoneId). This will only happen if skipConversion flag is off.
+   * If skipConversion is off and timeZoneId is not found, then convert the timestamp to system
+   * time zone.
+   *
+   * For skipConversion to be true it must be set in conf AND the parquet file must NOT be written
+   * by parquet's java library (parquet-mr). This is enforced in ParquetRecordReaderBase#getSplit.
+   */
+   public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion, ZoneId timeZoneId) {
+     if (skipConversion) {
+       timeZoneId = ZoneOffset.UTC;
+     } else if (timeZoneId == null) {
+       timeZoneId = TimeZone.getDefault().toZoneId();
+     }
+
      int julianDay = nt.getJulianDay();
      long nanosOfDay = nt.getTimeOfDayNanos();
 
@@ -92,7 +125,7 @@ public class NanoTimeUtils {
      }
 
      JDateTime jDateTime = new JDateTime((double) julianDay);
-     Calendar calendar = getCalendar(skipConversion);
+     Calendar calendar = getGMTCalendar();
      calendar.set(Calendar.YEAR, jDateTime.getYear());
      calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1.
      calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
@@ -107,7 +140,9 @@ public class NanoTimeUtils {
      calendar.set(Calendar.HOUR_OF_DAY, hour);
      calendar.set(Calendar.MINUTE, minutes);
      calendar.set(Calendar.SECOND, seconds);
+
      Timestamp ts = Timestamp.ofEpochMilli(calendar.getTimeInMillis(), (int) nanos);
+     ts = TimestampTZUtil.convertTimestampToZone(ts, ZoneOffset.UTC, timeZoneId);
      return ts;
    }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java
index 9ce1ba4..e8fcb6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java
@@ -39,6 +39,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.time.ZoneId;
 
 import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL;
 import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL;
@@ -53,6 +54,7 @@ public abstract class BaseVectorizedColumnReader implements VectorizedColumnRead
   private static final Logger LOG = LoggerFactory.getLogger(BaseVectorizedColumnReader.class);
 
   protected boolean skipTimestampConversion = false;
+  protected ZoneId writerTimezone = null;
 
   /**
    * Total number of values read.
@@ -116,12 +118,14 @@ public abstract class BaseVectorizedColumnReader implements VectorizedColumnRead
       ColumnDescriptor descriptor,
       PageReader pageReader,
       boolean skipTimestampConversion,
+      ZoneId writerTimezone,
       Type parquetType, TypeInfo hiveType) throws IOException {
     this.descriptor = descriptor;
     this.type = parquetType;
     this.pageReader = pageReader;
     this.maxDefLevel = descriptor.getMaxDefinitionLevel();
     this.skipTimestampConversion = skipTimestampConversion;
+    this.writerTimezone = writerTimezone;
     this.hiveType = hiveType;
 
     DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
@@ -130,7 +134,7 @@ public abstract class BaseVectorizedColumnReader implements VectorizedColumnRead
         this.dictionary = ParquetDataColumnReaderFactory
             .getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType,
                 dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage),
-                skipTimestampConversion);
+                skipTimestampConversion, writerTimezone);
         this.isCurrentPageDictionaryEncoded = true;
       } catch (IOException e) {
         throw new IOException("could not decode the dictionary for " + descriptor, e);
@@ -182,11 +186,11 @@ public abstract class BaseVectorizedColumnReader implements VectorizedColumnRead
       }
       dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
           dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary
-              .getDictionary()), skipTimestampConversion);
+              .getDictionary()), skipTimestampConversion, writerTimezone);
       this.isCurrentPageDictionaryEncoded = true;
     } else {
       dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
-          dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion);
+          dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion, writerTimezone);
       this.isCurrentPageDictionaryEncoded = false;
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
index 7372275..320ce52 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
@@ -44,6 +44,7 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.charset.StandardCharsets;
+import java.time.ZoneId;
 import java.util.Arrays;
 
 /**
@@ -1172,16 +1173,20 @@ public final class ParquetDataColumnReaderFactory {
    */
   public static class TypesFromInt96PageReader extends DefaultParquetDataColumnReader {
     private boolean skipTimestampConversion = false;
+    private ZoneId writerTimezone;
 
     public TypesFromInt96PageReader(ValuesReader realReader, int length,
-                                    boolean skipTimestampConversion) {
+                                    boolean skipTimestampConversion, ZoneId writerTimezone) {
       super(realReader, length);
       this.skipTimestampConversion = skipTimestampConversion;
+      this.writerTimezone = writerTimezone;
     }
 
-    public TypesFromInt96PageReader(Dictionary dict, int length, boolean skipTimestampConversion) {
+    public TypesFromInt96PageReader(Dictionary dict, int length, boolean skipTimestampConversion,
+        ZoneId writerTimezone) {
       super(dict, length);
       this.skipTimestampConversion = skipTimestampConversion;
+      this.writerTimezone = writerTimezone;
     }
 
     private Timestamp convert(Binary binary) {
@@ -1190,7 +1195,7 @@ public final class ParquetDataColumnReaderFactory {
       long timeOfDayNanos = buf.getLong();
       int julianDay = buf.getInt();
       NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
-      return NanoTimeUtils.getTimestamp(nt, skipTimestampConversion);
+      return NanoTimeUtils.getTimestamp(nt, skipTimestampConversion, writerTimezone);
     }
 
     @Override
@@ -1477,7 +1482,8 @@ public final class ParquetDataColumnReaderFactory {
                                                                          Dictionary dictionary,
                                                                          ValuesReader valuesReader,
                                                                          boolean
-                                                                             skipTimestampConversion)
+                                                                             skipTimestampConversion,
+                                                                         ZoneId writerTimezone)
       throws IOException {
     // max length for varchar and char cases
     int length = getVarcharLength(hiveType);
@@ -1523,8 +1529,8 @@ public final class ParquetDataColumnReaderFactory {
           hiveScale) : new TypesFromFloatPageReader(valuesReader, length, hivePrecision, hiveScale);
     case INT96:
       return isDictionary ? new TypesFromInt96PageReader(dictionary, length,
-          skipTimestampConversion) : new
-          TypesFromInt96PageReader(valuesReader, length, skipTimestampConversion);
+          skipTimestampConversion, writerTimezone) : new
+          TypesFromInt96PageReader(valuesReader, length, skipTimestampConversion, writerTimezone);
     case BOOLEAN:
       return isDictionary ? new TypesFromBooleanPageReader(dictionary, length) : new
           TypesFromBooleanPageReader(valuesReader, length);
@@ -1584,19 +1590,20 @@ public final class ParquetDataColumnReaderFactory {
   public static ParquetDataColumnReader getDataColumnReaderByTypeOnDictionary(
       PrimitiveType parquetType,
       TypeInfo hiveType,
-      Dictionary realReader, boolean skipTimestampConversion)
+      Dictionary realReader,
+      boolean skipTimestampConversion,
+      ZoneId writerTimezone)
       throws IOException {
     return getDataColumnReaderByTypeHelper(true, parquetType, hiveType, realReader, null,
-        skipTimestampConversion);
+        skipTimestampConversion, writerTimezone);
   }
 
   public static ParquetDataColumnReader getDataColumnReaderByType(PrimitiveType parquetType,
-                                                                  TypeInfo hiveType,
-                                                                  ValuesReader realReader,
-                                                                  boolean skipTimestampConversion)
+      TypeInfo hiveType, ValuesReader realReader, boolean skipTimestampConversion,
+      ZoneId writerTimezone)
       throws IOException {
     return getDataColumnReaderByTypeHelper(false, parquetType, hiveType, null, realReader,
-        skipTimestampConversion);
+        skipTimestampConversion, writerTimezone);
   }
 
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
index 7e52b07..5c1ce70 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
@@ -28,6 +28,7 @@ import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.schema.Type;
 import java.io.IOException;
+import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -47,9 +48,9 @@ public class VectorizedListColumnReader extends BaseVectorizedColumnReader {
   boolean isFirstRow = true;
 
   public VectorizedListColumnReader(ColumnDescriptor descriptor, PageReader pageReader,
-                                    boolean skipTimestampConversion, Type type, TypeInfo hiveType)
+      boolean skipTimestampConversion, ZoneId writerTimezone, Type type, TypeInfo hiveType)
       throws IOException {
-    super(descriptor, pageReader, skipTimestampConversion, type, hiveType);
+    super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType);
   }
 
   @Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
index fd776cf..61e2556 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
@@ -73,6 +73,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
@@ -125,6 +126,7 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
    * rows of all the row groups.
    */
   protected long totalRowCount = 0;
+  private ZoneId writerTimezone;
 
   public VectorizedParquetRecordReader(
       org.apache.hadoop.mapred.InputSplit oldInputSplit, JobConf conf) {
@@ -250,6 +252,8 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
       this.totalRowCount += block.getRowCount();
     }
     this.fileSchema = footer.getFileMetaData().getSchema();
+    this.writerTimezone = DataWritableReadSupport
+        .getWriterTimeZoneId(footer.getFileMetaData().getKeyValueMetaData());
 
     colsToInclude = ColumnProjectionUtils.getReadColumnIDs(configuration);
     requestedSchema = DataWritableReadSupport
@@ -440,13 +444,13 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
         for (int i = 0; i < types.size(); ++i) {
           columnReaders[i] =
               buildVectorizedParquetReader(columnTypesList.get(colsToInclude.get(i)), types.get(i),
-                  pages, requestedSchema.getColumns(), skipTimestampConversion, 0);
+                  pages, requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0);
         }
       }
     } else {
       for (int i = 0; i < types.size(); ++i) {
         columnReaders[i] = buildVectorizedParquetReader(columnTypesList.get(i), types.get(i), pages,
-          requestedSchema.getColumns(), skipTimestampConversion, 0);
+          requestedSchema.getColumns(), skipTimestampConversion, writerTimezone, 0);
       }
     }
 
@@ -489,6 +493,7 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
     PageReadStore pages,
     List<ColumnDescriptor> columnDescriptors,
     boolean skipTimestampConversion,
+    ZoneId writerTimezone,
     int depth) throws IOException {
     List<ColumnDescriptor> descriptors =
       getAllColumnDescriptorByType(depth, type, columnDescriptors);
@@ -500,7 +505,8 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
       }
       if (fileSchema.getColumns().contains(descriptors.get(0))) {
         return new VectorizedPrimitiveColumnReader(descriptors.get(0),
-          pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
+            pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, type,
+            typeInfo);
       } else {
         // Support for schema evolution
         return new VectorizedDummyColumnReader();
@@ -513,7 +519,7 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
       for (int i = 0; i < fieldTypes.size(); i++) {
         VectorizedColumnReader r =
           buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors,
-            skipTimestampConversion, depth + 1);
+            skipTimestampConversion, writerTimezone, depth + 1);
         if (r != null) {
           fieldReaders.add(r);
         } else {
@@ -531,7 +537,8 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
       }
 
       return new VectorizedListColumnReader(descriptors.get(0),
-          pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type),
+          pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone,
+          getElementType(type),
           typeInfo);
     case MAP:
       if (columnDescriptors == null || columnDescriptors.isEmpty()) {
@@ -564,10 +571,10 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
       List<Type> kvTypes = groupType.getFields();
       VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(
           descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion,
-          kvTypes.get(0), typeInfo);
+          writerTimezone, kvTypes.get(0), typeInfo);
       VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(
           descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion,
-          kvTypes.get(1), typeInfo);
+          writerTimezone, kvTypes.get(1), typeInfo);
       return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
     case UNION:
     default:
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
index 003fbd9..1a861f3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
@@ -31,6 +31,7 @@ import org.apache.parquet.schema.DecimalMetadata;
 import org.apache.parquet.schema.Type;
 
 import java.io.IOException;
+import java.time.ZoneId;
 
 /**
  * It's column level Parquet reader which is used to read a batch of records for a column,
@@ -47,8 +48,11 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
       ColumnDescriptor descriptor,
       PageReader pageReader,
       boolean skipTimestampConversion,
-      Type type, TypeInfo hiveType) throws IOException {
-    super(descriptor, pageReader, skipTimestampConversion, type, hiveType);
+      ZoneId writerTimezone,
+      Type type,
+      TypeInfo hiveType)
+      throws IOException {
+    super(descriptor, pageReader, skipTimestampConversion, writerTimezone, type, hiveType);
   }
 
   @Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java
index 8960944..8acde81 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriteSupport.java
@@ -14,6 +14,8 @@
 package org.apache.hadoop.hive.ql.io.parquet.write;
 
 import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord;
@@ -32,6 +34,7 @@ import org.apache.parquet.schema.MessageTypeParser;
 public class DataWritableWriteSupport extends WriteSupport<ParquetHiveRecord> {
 
   public static final String PARQUET_HIVE_SCHEMA = "parquet.hive.schema";
+  public static final String WRITER_TIMEZONE = "writer.time.zone";
 
   private DataWritableWriter writer;
   private MessageType schema;
@@ -47,7 +50,9 @@ public class DataWritableWriteSupport extends WriteSupport<ParquetHiveRecord> {
   @Override
   public WriteContext init(final Configuration configuration) {
     schema = getSchema(configuration);
-    return new WriteContext(schema, new HashMap<String, String>());
+    Map<String, String> metaData = new HashMap<>();
+    metaData.put(WRITER_TIMEZONE, TimeZone.getDefault().toZoneId().toString());
+    return new WriteContext(schema, metaData);
   }
 
   @Override
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
index 477825e..52b8401 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
@@ -13,12 +13,13 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.serde;
 
+import java.time.ZoneId;
 import java.util.Calendar;
-import java.util.GregorianCalendar;
 import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.common.type.TimestampTZUtil;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
 
@@ -32,6 +33,10 @@ import junit.framework.TestCase;
  */
 public class TestParquetTimestampUtils extends TestCase {
 
+  public static final ZoneId GMT = ZoneId.of("GMT");
+  public static final ZoneId US_PACIFIC = ZoneId.of("US/Pacific");
+  public static final ZoneId NEW_YORK = ZoneId.of("America/New_York");
+
   public void testJulianDay() {
     //check if May 23, 1968 is Julian Day 2440000
     Calendar cal = Calendar.getInstance();
@@ -76,10 +81,11 @@ public class TestParquetTimestampUtils extends TestCase {
     Assert.assertEquals(ts2Fetched, ts2);
     Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30);
 
-    //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005.
+    // check if 730517 Julian Days between Jan 1, 0005 and Jan 31, 2005.
+    // This method used to test Julian Days between Jan 1, 2005 BCE and Jan 1, 2005 CE. Since BCE
+    // timestamps are not supported, both dates were changed to CE.
     cal1 = Calendar.getInstance();
-    cal1.set(Calendar.ERA,  GregorianCalendar.BC);
-    cal1.set(Calendar.YEAR,  2005);
+    cal1.set(Calendar.YEAR,  0005);
     cal1.set(Calendar.MONTH, Calendar.JANUARY);
     cal1.set(Calendar.DAY_OF_MONTH, 1);
     cal1.set(Calendar.HOUR_OF_DAY, 0);
@@ -103,7 +109,7 @@ public class TestParquetTimestampUtils extends TestCase {
 
     ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false);
     Assert.assertEquals(ts2Fetched, ts2);
-    Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305);
+    Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 730517);
 }
 
   public void testNanos() {
@@ -119,7 +125,7 @@ public class TestParquetTimestampUtils extends TestCase {
     Timestamp ts = Timestamp.ofEpochMilli(cal.getTimeInMillis(), 1);
 
     //(1*60*60 + 1*60 + 1) * 10e9 + 1
-    NanoTime nt = NanoTimeUtils.getNanoTime(ts, false);
+    NanoTime nt = NanoTimeUtils.getNanoTime(ts, false, GMT);
     Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L);
 
     //case 2: 23:59:59.999999999
@@ -134,7 +140,7 @@ public class TestParquetTimestampUtils extends TestCase {
     ts = Timestamp.ofEpochMilli(cal.getTimeInMillis(), 999999999);
 
     //(23*60*60 + 59*60 + 59)*10e9 + 999999999
-    nt = NanoTimeUtils.getNanoTime(ts, false);
+    nt = NanoTimeUtils.getNanoTime(ts, false, GMT);
     Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L);
 
     //case 3: verify the difference.
@@ -158,15 +164,15 @@ public class TestParquetTimestampUtils extends TestCase {
     cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
     Timestamp ts1 = Timestamp.ofEpochMilli(cal1.getTimeInMillis(), 1);
 
-    NanoTime n2 = NanoTimeUtils.getNanoTime(ts2, false);
-    NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false);
+    NanoTime n2 = NanoTimeUtils.getNanoTime(ts2, false, GMT);
+    NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false, GMT);
 
     Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L);
 
     NanoTime n3 = new NanoTime(n1.getJulianDay() - 1, n1.getTimeOfDayNanos() + TimeUnit.DAYS.toNanos(1));
-    Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false));
+    Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false, GMT));
     n3 = new NanoTime(n1.getJulianDay() + 3, n1.getTimeOfDayNanos() - TimeUnit.DAYS.toNanos(3));
-    Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false));
+    Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false, GMT));
   }
 
   public void testTimezone() {
@@ -179,6 +185,7 @@ public class TestParquetTimestampUtils extends TestCase {
     cal.set(Calendar.SECOND, 1);
     cal.setTimeZone(TimeZone.getTimeZone("US/Pacific"));
     Timestamp ts = Timestamp.ofEpochMilli(cal.getTimeInMillis(), 1);
+    ts = TimestampTZUtil.convertTimestampToZone(ts, GMT, US_PACIFIC);
 
     /**
      * 17:00 PDT = 00:00 GMT (daylight-savings)
@@ -187,7 +194,7 @@ public class TestParquetTimestampUtils extends TestCase {
      * 17:00 PST = 01:00 GMT (if not daylight savings)
      * (1*60*60 + 1*60 + 1)*10e9 + 1 = 3661000000001
      */
-    NanoTime nt = NanoTimeUtils.getNanoTime(ts, false);
+    NanoTime nt = NanoTimeUtils.getNanoTime(ts, false, US_PACIFIC);
     long timeOfDayNanos = nt.getTimeOfDayNanos();
     Assert.assertTrue(timeOfDayNanos == 61000000001L || timeOfDayNanos == 3661000000001L);
 
@@ -206,15 +213,15 @@ public class TestParquetTimestampUtils extends TestCase {
   public void testTimezoneless() {
     Timestamp ts1 = Timestamp.valueOf("2011-01-01 00:30:30.111111111");
     NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, true);
-    Assert.assertEquals(nt1.getJulianDay(), 2455562);
-    Assert.assertEquals(nt1.getTimeOfDayNanos(), 59430111111111L);
+    Assert.assertEquals(nt1.getJulianDay(), 2455563);
+    Assert.assertEquals(nt1.getTimeOfDayNanos(), 1830111111111L);
     Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, true);
     Assert.assertEquals(ts1Fetched.toString(), ts1.toString());
 
     Timestamp ts2 = Timestamp.valueOf("2011-02-02 08:30:30.222222222");
     NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, true);
     Assert.assertEquals(nt2.getJulianDay(), 2455595);
-    Assert.assertEquals(nt2.getTimeOfDayNanos(), 1830222222222L);
+    Assert.assertEquals(nt2.getTimeOfDayNanos(), 30630222222222L);
     Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, true);
     Assert.assertEquals(ts2Fetched.toString(), ts2.toString());
   }
@@ -254,4 +261,13 @@ public class TestParquetTimestampUtils extends TestCase {
     Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, local);
     Assert.assertEquals(tsString, tsFetched.toString());
   }
+
+  public void testConvertTimestampToZone() {
+    Timestamp ts = Timestamp.valueOf("2018-01-01 00:00:00");
+    Timestamp ts1 = TimestampTZUtil.convertTimestampToZone(ts, NEW_YORK, US_PACIFIC);
+    Assert.assertTrue(Timestamp.valueOf("2017-12-31 21:00:00").equals(ts1));
+
+    Timestamp ts2 = TimestampTZUtil.convertTimestampToZone(ts, US_PACIFIC, NEW_YORK);
+    Assert.assertTrue(Timestamp.valueOf("2018-01-01 03:00:00").equals(ts2));
+  }
 }
diff --git a/ql/src/test/queries/clientpositive/parquet_external_time.q b/ql/src/test/queries/clientpositive/parquet_external_time.q
index d83125c..19a7059 100644
--- a/ql/src/test/queries/clientpositive/parquet_external_time.q
+++ b/ql/src/test/queries/clientpositive/parquet_external_time.q
@@ -1,4 +1,5 @@
 set hive.vectorized.execution.enabled=false;
+set hive.parquet.timestamp.skip.conversion=true;
 
 create table timetest_parquet(t timestamp) stored as parquet;
 
diff --git a/ql/src/test/queries/clientpositive/parquet_historical_timestamp.q b/ql/src/test/queries/clientpositive/parquet_historical_timestamp.q
new file mode 100644
index 0000000..3d2b382
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_historical_timestamp.q
@@ -0,0 +1,16 @@
+--These files were created by inserting timestamp '2019-01-01 00:30:30.111111111' where writer time zone is Europe/Rome.
+
+--older writer: time zone dependent behavior. convert to reader time zone
+create table legacy_table (t timestamp) stored as parquet;
+
+load data local inpath '../../data/files/parquet_historical_timestamp_legacy.parq' into table legacy_table;
+
+select * from legacy_table;
+
+
+--newer writer: time zone agnostic behavior. convert to writer time zone
+create table new_table (t timestamp) stored as parquet;
+
+load data local inpath '../../data/files/parquet_historical_timestamp_new.parq' into table new_table;
+
+select * from new_table;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/parquet_analyze.q.out b/ql/src/test/results/clientpositive/parquet_analyze.q.out
index f2088fd..cac82a0 100644
--- a/ql/src/test/results/clientpositive/parquet_analyze.q.out
+++ b/ql/src/test/results/clientpositive/parquet_analyze.q.out
@@ -94,7 +94,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	5936                
-	totalSize           	6730                
+	totalSize           	6761                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -142,7 +142,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	5936                
-	totalSize           	6730                
+	totalSize           	6761                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
diff --git a/ql/src/test/results/clientpositive/parquet_historical_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_historical_timestamp.q.out
new file mode 100644
index 0000000..9d50b22
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_historical_timestamp.q.out
@@ -0,0 +1,50 @@
+PREHOOK: query: create table legacy_table (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@legacy_table
+POSTHOOK: query: create table legacy_table (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@legacy_table
+PREHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_legacy.parq' into table legacy_table
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@legacy_table
+POSTHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_legacy.parq' into table legacy_table
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@legacy_table
+PREHOOK: query: select * from legacy_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@legacy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from legacy_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@legacy_table
+#### A masked pattern was here ####
+2018-12-31 16:30:30.111111111
+PREHOOK: query: create table new_table (t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_table
+POSTHOOK: query: create table new_table (t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_table
+PREHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_new.parq' into table new_table
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@new_table
+POSTHOOK: query: load data local inpath '../../data/files/parquet_historical_timestamp_new.parq' into table new_table
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@new_table
+PREHOOK: query: select * from new_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@new_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from new_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_table
+#### A masked pattern was here ####
+2019-01-01 00:30:30.111111111
diff --git a/ql/src/test/results/clientpositive/parquet_stats.q.out b/ql/src/test/results/clientpositive/parquet_stats.q.out
index 007b9a7..ea15e91 100644
--- a/ql/src/test/results/clientpositive/parquet_stats.q.out
+++ b/ql/src/test/results/clientpositive/parquet_stats.q.out
@@ -48,7 +48,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	2                   
 	rawDataSize         	146                 
-	totalSize           	469                 
+	totalSize           	500                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
index ebbe004..dfb87d9 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out
@@ -1144,7 +1144,7 @@ STAGE PLANS:
               serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-              totalSize 595141
+              totalSize 595172
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
           
@@ -1166,7 +1166,7 @@ STAGE PLANS:
                 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                totalSize 595141
+                totalSize 595172
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
               name: default.alltypesparquet
@@ -29969,7 +29969,7 @@ STAGE PLANS:
               serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-              totalSize 595141
+              totalSize 595172
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
           
@@ -29991,7 +29991,7 @@ STAGE PLANS:
                 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                totalSize 595141
+                totalSize 595172
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
               name: default.alltypesparquet
@@ -30086,7 +30086,7 @@ STAGE PLANS:
               serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-              totalSize 595141
+              totalSize 595172
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
           
@@ -30108,7 +30108,7 @@ STAGE PLANS:
                 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                totalSize 595141
+                totalSize 595172
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
               name: default.alltypesparquet
@@ -30207,7 +30207,7 @@ STAGE PLANS:
               serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-              totalSize 595141
+              totalSize 595172
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
           
@@ -30229,7 +30229,7 @@ STAGE PLANS:
                 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                totalSize 595141
+                totalSize 595172
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
               name: default.alltypesparquet
@@ -30314,7 +30314,7 @@ STAGE PLANS:
               serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-              totalSize 595141
+              totalSize 595172
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
           
@@ -30336,7 +30336,7 @@ STAGE PLANS:
                 serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                totalSize 595141
+                totalSize 595172
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
               name: default.alltypesparquet
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
index adb9f4a..1af0902 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
@@ -1253,7 +1253,7 @@ STAGE PLANS:
                     serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                    totalSize 595141
+                    totalSize 595172
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                 
@@ -1275,7 +1275,7 @@ STAGE PLANS:
                       serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                      totalSize 595141
+                      totalSize 595172
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                     name: default.alltypesparquet
@@ -30083,7 +30083,7 @@ STAGE PLANS:
                     serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                    totalSize 595141
+                    totalSize 595172
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                 
@@ -30105,7 +30105,7 @@ STAGE PLANS:
                       serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                      totalSize 595141
+                      totalSize 595172
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                     name: default.alltypesparquet
@@ -30203,7 +30203,7 @@ STAGE PLANS:
                     serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                    totalSize 595141
+                    totalSize 595172
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                 
@@ -30225,7 +30225,7 @@ STAGE PLANS:
                       serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                      totalSize 595141
+                      totalSize 595172
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                     name: default.alltypesparquet
@@ -30327,7 +30327,7 @@ STAGE PLANS:
                     serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                    totalSize 595141
+                    totalSize 595172
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                 
@@ -30349,7 +30349,7 @@ STAGE PLANS:
                       serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                      totalSize 595141
+                      totalSize 595172
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                     name: default.alltypesparquet
@@ -30439,7 +30439,7 @@ STAGE PLANS:
                     serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                    totalSize 595141
+                    totalSize 595172
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                 
@@ -30461,7 +30461,7 @@ STAGE PLANS:
                       serialization.ddl struct alltypesparquet { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
-                      totalSize 595141
+                      totalSize 595172
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
                     name: default.alltypesparquet