You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/12/04 00:12:26 UTC
[orc] branch branch-1.5 updated: ORC-27: Add support for proleptic Gregorian calendar for better support of dates before 1600AD.

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.5 by this push:
     new 3b39b27  ORC-27: Add support for proleptic Gregorian calendar for better support of dates before 1600AD.
3b39b27 is described below

commit 3b39b2708ecc2dc7b8ebf264a9edede8f7b70d57
Author: Owen O'Malley <om...@apache.org>
AuthorDate: Tue Nov 26 09:57:16 2019 -0800

    ORC-27: Add support for proleptic Gregorian calendar for better support of
    dates before 1600AD.
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java     |  11 +-
 java/core/src/java/org/apache/orc/OrcFile.java     |  43 ++++-
 java/core/src/java/org/apache/orc/Reader.java      |   5 +
 .../src/java/org/apache/orc/StripeStatistics.java  |  14 +-
 .../src/java/org/apache/orc/TypeDescription.java   |   4 +-
 .../org/apache/orc/impl/ColumnStatisticsImpl.java  |  52 ++++--
 .../src/java/org/apache/orc/impl/DateUtils.java    | 182 +++++++++++++++++++++
 .../src/java/org/apache/orc/impl/ReaderImpl.java   |  15 +-
 .../java/org/apache/orc/impl/RecordReaderImpl.java |  16 +-
 .../org/apache/orc/impl/TreeReaderFactory.java     |  47 ++++++
 .../src/java/org/apache/orc/impl/WriterImpl.java   |  42 ++++-
 .../org/apache/orc/impl/writer/DateTreeWriter.java |   9 +
 .../orc/impl/writer/TimestampTreeWriter.java       |   3 +
 .../org/apache/orc/impl/writer/WriterContext.java  |   6 +
 .../org/apache/orc/TestProlepticConversions.java   | 177 ++++++++++++++++++++
 .../test/org/apache/orc/TestStringDictionary.java  |   5 +
 .../org/apache/orc/impl/TestRecordReaderImpl.java  |  48 +++++-
 java/pom.xml                                       |   2 +-
 .../src/java/org/apache/orc/tools/FileDump.java    |  12 +-
 .../java/org/apache/orc/tools/JsonFileDump.java    |  20 +--
 .../test/resources/orc-file-dump-bloomfilter.out   |   1 +
 .../test/resources/orc-file-dump-bloomfilter2.out  |   1 +
 .../orc-file-dump-dictionary-threshold.out         |   1 +
 java/tools/src/test/resources/orc-file-dump.json   |   1 +
 java/tools/src/test/resources/orc-file-dump.out    |   1 +
 .../tools/src/test/resources/orc-file-has-null.out |   1 +
 proto/orc_proto.proto                              |  11 ++
 27 files changed, 688 insertions(+), 42 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index a6fbad1..6586937 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -162,7 +162,16 @@ public enum OrcConf {
       "Comma-separated list of columns for which dictionary encoding is to be skipped."),
   // some JVM doesn't allow array creation of size Integer.MAX_VALUE, so chunk size is slightly less than max int
   ORC_MAX_DISK_RANGE_CHUNK_LIMIT("orc.max.disk.range.chunk.limit", "hive.exec.orc.max.disk.range.chunk.limit",
-    Integer.MAX_VALUE - 1024, "When reading stripes >2GB, specify max limit for the chunk size.")
+    Integer.MAX_VALUE - 1024, "When reading stripes >2GB, specify max limit for the chunk size."),
+  PROLEPTIC_GREGORIAN("orc.proleptic.gregorian", "orc.proleptic.gregorian", false,
+      "Should we read and write dates & times using the proleptic Gregorian calendar\n" +
+          "instead of the hybrid Julian Gregorian? Hive before 3.1 and Spark before 3.0\n" +
+          "used hybrid."),
+  PROLEPTIC_GREGORIAN_DEFAULT("orc.proleptic.gregorian.default",
+      "orc.proleptic.gregorian.default", false,
+      "This value controls whether pre-ORC 27 files are using the hybrid or proleptic\n" +
+      "calendar. Only Hive 3.1 and the C++ library wrote using the proleptic, so hybrid\n" +
+      "is the default.")
   ;
 
   private final String attribute;
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 62e6260..5b15be2 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -278,9 +278,11 @@ public class OrcFile {
     // For now keeping this around to avoid complex surgery
     private FileMetadata fileMetadata;
     private boolean useUTCTimestamp;
+    private boolean useProlepticGregorian;
 
     public ReaderOptions(Configuration conf) {
       this.conf = conf;
+      this.useProlepticGregorian = OrcConf.PROLEPTIC_GREGORIAN.getBoolean(conf);
     }
 
     public ReaderOptions filesystem(FileSystem fs) {
@@ -298,6 +300,17 @@ public class OrcFile {
       return this;
     }
 
+    /**
+     * Should the reader convert dates and times to the proleptic Gregorian
+     * calendar?
+     * @param newValue should it use the proleptic Gregorian calendar?
+     * @return this
+     */
+    public ReaderOptions convertToProlepticGregorian(boolean newValue) {
+      this.useProlepticGregorian = newValue;
+      return this;
+    }
+
     public Configuration getConfiguration() {
       return conf;
     }
@@ -332,6 +345,9 @@ public class OrcFile {
       return useUTCTimestamp;
     }
 
+    public boolean getConvertToProlepticGregorian() {
+      return useProlepticGregorian;
+    }
   }
 
   public static ReaderOptions readerOptions(Configuration conf) {
@@ -409,6 +425,7 @@ public class OrcFile {
     private boolean writeVariableLengthBlocks;
     private HadoopShims shims;
     private String directEncodingColumns;
+    private boolean useProlepticGregorian;
 
     protected WriterOptions(Properties tableProperties, Configuration conf) {
       configuration = conf;
@@ -453,6 +470,7 @@ public class OrcFile {
           OrcConf.WRITE_VARIABLE_LENGTH_BLOCKS.getBoolean(tableProperties,conf);
       directEncodingColumns = OrcConf.DIRECT_ENCODING_COLUMNS.getString(
           tableProperties, conf);
+      useProlepticGregorian = OrcConf.PROLEPTIC_GREGORIAN.getBoolean(conf);
     }
 
     /**
@@ -701,6 +719,17 @@ public class OrcFile {
       return this;
     }
 
+    /**
+     * Should the writer use the proleptic Gregorian calendar for
+     * times and dates.
+     * @param newValue true if we should use the proleptic calendar
+     * @return this
+     */
+    public WriterOptions setProlepticGregorian(boolean newValue) {
+      this.useProlepticGregorian = newValue;
+      return this;
+    }
+
     public boolean getBlockPadding() {
       return blockPaddingValue;
     }
@@ -804,6 +833,10 @@ public class OrcFile {
     public String getDirectEncodingColumns() {
       return directEncodingColumns;
     }
+
+    public boolean getProlepticGregorian() {
+      return useProlepticGregorian;
+    }
   }
 
   /**
@@ -898,6 +931,7 @@ public class OrcFile {
                                     int rowIndexStride,
                                     CompressionKind compression,
                                     Map<String, ByteBuffer> userMetadata,
+                                    boolean writerUsedProlepticGregorian,
                                     Path path,
                                     Reader reader) {
     // now we have to check compatibility
@@ -937,6 +971,10 @@ public class OrcFile {
         }
       }
     }
+    if (writerUsedProlepticGregorian != reader.writerUsedProlepticGregorian()) {
+      LOG.info("Can't merge {} because it uses a different calendar", path);
+      return false;
+    }
     return true;
   }
 
@@ -978,6 +1016,7 @@ public class OrcFile {
       int rowIndexStride = 0;
       List<Path> result = new ArrayList<>(inputFiles.size());
       Map<String, ByteBuffer> userMetadata = new HashMap<>();
+      boolean writerUsedProlepticGregorian = false;
 
       for (Path input : inputFiles) {
         FileSystem fs = input.getFileSystem(conf);
@@ -994,6 +1033,7 @@ public class OrcFile {
           rowIndexStride = reader.getRowIndexStride();
           fileVersion = reader.getFileVersion();
           writerVersion = reader.getWriterVersion();
+          writerUsedProlepticGregorian = reader.writerUsedProlepticGregorian();
           options.bufferSize(bufferSize)
               .version(fileVersion)
               .writerVersion(writerVersion)
@@ -1006,7 +1046,8 @@ public class OrcFile {
           mergeMetadata(userMetadata, reader);
           output = createWriter(outputPath, options);
         } else if (!readerIsCompatible(schema, fileVersion, writerVersion,
-            rowIndexStride, compression, userMetadata, input, reader)) {
+            rowIndexStride, compression, userMetadata,
+            writerUsedProlepticGregorian, input, reader)) {
           continue;
         } else {
           mergeMetadata(userMetadata, reader);
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 6d6e04b..3c7d005 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -456,4 +456,9 @@ public interface Reader extends Closeable {
    * @return Serialized file metadata read from disk for the purposes of caching, etc.
    */
   ByteBuffer getSerializedFileFooter();
+
+  /**
+   * Was the file written using the proleptic Gregorian calendar.
+   */
+  boolean writerUsedProlepticGregorian();
 }
diff --git a/java/core/src/java/org/apache/orc/StripeStatistics.java b/java/core/src/java/org/apache/orc/StripeStatistics.java
index d1738ff..6fa5764 100644
--- a/java/core/src/java/org/apache/orc/StripeStatistics.java
+++ b/java/core/src/java/org/apache/orc/StripeStatistics.java
@@ -19,14 +19,22 @@
 package org.apache.orc;
 
 import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.impl.ReaderImpl;
 
 import java.util.List;
 
 public class StripeStatistics {
   private final List<OrcProto.ColumnStatistics> cs;
+  private final ReaderImpl reader;
 
   public StripeStatistics(List<OrcProto.ColumnStatistics> list) {
+    this(list, null);
+  }
+
+  public StripeStatistics(List<OrcProto.ColumnStatistics> list,
+                          ReaderImpl reader) {
     this.cs = list;
+    this.reader = reader;
   }
 
   /**
@@ -37,7 +45,11 @@ public class StripeStatistics {
   public ColumnStatistics[] getColumnStatistics() {
     ColumnStatistics[] result = new ColumnStatistics[cs.size()];
     for (int i = 0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(null, cs.get(i));
+      if (reader == null) {
+        result[i] = ColumnStatisticsImpl.deserialize(null, cs.get(i));
+      } else {
+        result[i] = ColumnStatisticsImpl.deserialize(null, cs.get(i), reader);
+      }
     }
     return result;
   }
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index 8372207..1e6e056 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -20,6 +20,7 @@ package org.apache.orc;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -634,8 +635,9 @@ public class TypeDescription
       case SHORT:
       case INT:
       case LONG:
-      case DATE:
         return new LongColumnVector(maxSize);
+      case DATE:
+        return new DateColumnVector(maxSize);
       case TIMESTAMP:
         return new TimestampColumnVector(maxSize);
       case FLOAT:
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index fc77d76..80f69e5 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -1174,15 +1174,19 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     DateStatisticsImpl() {
     }
 
-    DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
+    DateStatisticsImpl(OrcProto.ColumnStatistics stats,
+                       boolean writerUsedProlepticGregorian,
+                       boolean convertToProlepticGregorian) {
       super(stats);
       OrcProto.DateStatistics dateStats = stats.getDateStatistics();
       // min,max values serialized/deserialized as int (days since epoch)
       if (dateStats.hasMaximum()) {
-        maximum = dateStats.getMaximum();
+        maximum = DateUtils.convertDate(dateStats.getMaximum(),
+            writerUsedProlepticGregorian, convertToProlepticGregorian);
       }
       if (dateStats.hasMinimum()) {
-        minimum = dateStats.getMinimum();
+        minimum = DateUtils.convertDate(dateStats.getMinimum(),
+            writerUsedProlepticGregorian, convertToProlepticGregorian);
       }
     }
 
@@ -1335,23 +1339,31 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     TimestampStatisticsImpl() {
     }
 
-    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
+    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats,
+                            boolean writerUsedProlepticGregorian,
+                            boolean convertToProlepticGregorian) {
       super(stats);
       OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
       // min,max values serialized/deserialized as int (milliseconds since epoch)
       if (timestampStats.hasMaximum()) {
-        maximum = SerializationUtils.convertToUtc(TimeZone.getDefault(),
-            timestampStats.getMaximum());
+        maximum = DateUtils.convertTime(
+            SerializationUtils.convertToUtc(TimeZone.getDefault(),
+               timestampStats.getMaximum()),
+            writerUsedProlepticGregorian, convertToProlepticGregorian);
       }
       if (timestampStats.hasMinimum()) {
-        minimum = SerializationUtils.convertToUtc(TimeZone.getDefault(),
-            timestampStats.getMinimum());
+        minimum = DateUtils.convertTime(
+            SerializationUtils.convertToUtc(TimeZone.getDefault(),
+                timestampStats.getMinimum()),
+            writerUsedProlepticGregorian, convertToProlepticGregorian);
       }
       if (timestampStats.hasMaximumUtc()) {
-        maximum = timestampStats.getMaximumUtc();
+        maximum = DateUtils.convertTime(timestampStats.getMaximumUtc(),
+            writerUsedProlepticGregorian, convertToProlepticGregorian);
       }
       if (timestampStats.hasMinimumUtc()) {
-        minimum = timestampStats.getMinimumUtc();
+        minimum = DateUtils.convertTime(timestampStats.getMinimumUtc(),
+            writerUsedProlepticGregorian, convertToProlepticGregorian);
       }
     }
 
@@ -1665,6 +1677,20 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
 
   public static ColumnStatisticsImpl deserialize(TypeDescription schema,
                                                  OrcProto.ColumnStatistics stats) {
+    return deserialize(schema, stats, false, false);
+  }
+
+  public static ColumnStatisticsImpl deserialize(TypeDescription schema,
+                                                 OrcProto.ColumnStatistics stats,
+                                                 ReaderImpl reader) {
+    return deserialize(schema, stats, reader.writerUsedProlepticGregorian(),
+        reader.options.getConvertToProlepticGregorian());
+  }
+
+  public static ColumnStatisticsImpl deserialize(TypeDescription schema,
+                                                 OrcProto.ColumnStatistics stats,
+                                                 boolean writerUsedProlepticGregorian,
+                                                 boolean convertToProlepticGregorian) {
     if (stats.hasBucketStatistics()) {
       return new BooleanStatisticsImpl(stats);
     } else if (stats.hasIntStatistics()) {
@@ -1681,9 +1707,11 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
         return new DecimalStatisticsImpl(stats);
       }
     } else if (stats.hasDateStatistics()) {
-      return new DateStatisticsImpl(stats);
+      return new DateStatisticsImpl(stats, writerUsedProlepticGregorian,
+          convertToProlepticGregorian);
     } else if (stats.hasTimestampStatistics()) {
-      return new TimestampStatisticsImpl(stats);
+      return new TimestampStatisticsImpl(stats, writerUsedProlepticGregorian,
+                                         convertToProlepticGregorian);
     } else if(stats.hasBinaryStatistics()) {
       return new BinaryStatisticsImpl(stats);
     } else {
diff --git a/java/core/src/java/org/apache/orc/impl/DateUtils.java b/java/core/src/java/org/apache/orc/impl/DateUtils.java
new file mode 100644
index 0000000..8ac574c
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/impl/DateUtils.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Conversion utilities from the hybrid Julian/Gregorian calendar to/from the
+ * proleptic Gregorian.
+ *
+ * The semantics here are to hold the string representation constant and change
+ * the epoch offset rather than holding the instant in time constant and change
+ * the string representation.
+ *
+ * These utilities will be fast for the common case (> 1582 AD), but slow for
+ * old dates.
+ */
+public class DateUtils {
+  private static SimpleDateFormat createFormatter(String fmt,
+                                                 GregorianCalendar calendar) {
+    SimpleDateFormat result = new SimpleDateFormat(fmt);
+    result.setCalendar(calendar);
+    return result;
+  }
+
+  private static final String DATE = "yyyy-MM-dd";
+  private static final String TIME = DATE + " HH:mm:ss";
+  private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+  private static final GregorianCalendar HYBRID = new GregorianCalendar();
+  private static final ThreadLocal<SimpleDateFormat> HYBRID_DATE_FORMAT =
+      ThreadLocal.withInitial(() -> createFormatter(DATE, HYBRID));
+  private static final ThreadLocal<SimpleDateFormat> HYBRID_TIME_FORMAT =
+      ThreadLocal.withInitial(() -> createFormatter(TIME, HYBRID));
+  private static final long SWITCHOVER_MILLIS;
+  private static final long SWITCHOVER_DAYS;
+  private static final GregorianCalendar PROLEPTIC = new GregorianCalendar();
+  private static final ThreadLocal<SimpleDateFormat> PROLEPTIC_DATE_FORMAT =
+      ThreadLocal.withInitial(() -> createFormatter(DATE, PROLEPTIC));
+  private static final ThreadLocal<SimpleDateFormat> PROLEPTIC_TIME_FORMAT =
+      ThreadLocal.withInitial(() -> createFormatter(TIME, PROLEPTIC));
+
+  static {
+    HYBRID.setTimeZone(UTC);
+    PROLEPTIC.setTimeZone(UTC);
+    PROLEPTIC.setGregorianChange(new Date(Long.MIN_VALUE));
+
+    // Get the last day where the two calendars agree with each other.
+    try {
+      SWITCHOVER_MILLIS = HYBRID_DATE_FORMAT.get().parse("1582-10-15").getTime();
+      SWITCHOVER_DAYS = TimeUnit.MILLISECONDS.toDays(SWITCHOVER_MILLIS);
+    } catch (ParseException e) {
+      throw new IllegalArgumentException("Can't parse switch over date", e);
+    }
+  }
+
+  /**
+   * Convert an epoch day from the hybrid Julian/Gregorian calendar to the
+   * proleptic Gregorian.
+   * @param hybrid day of epoch in the hybrid Julian/Gregorian
+   * @return day of epoch in the proleptic Gregorian
+   */
+  public static int convertDateToProleptic(int hybrid) {
+    int proleptic = hybrid;
+    if (hybrid < SWITCHOVER_DAYS) {
+      String dateStr = HYBRID_DATE_FORMAT.get().format(
+          new Date(TimeUnit.DAYS.toMillis(hybrid)));
+      try {
+        proleptic = (int) TimeUnit.MILLISECONDS.toDays(
+            PROLEPTIC_DATE_FORMAT.get().parse(dateStr).getTime());
+      } catch (ParseException e) {
+        throw new IllegalArgumentException("Can't parse " + dateStr, e);
+      }
+    }
+    return proleptic;
+  }
+
+  /**
+   * Convert an epoch day from the proleptic Gregorian calendar to the hybrid
+   * Julian/Gregorian.
+   * @param proleptic day of epoch in the proleptic Gregorian
+   * @return day of epoch in the hybrid Julian/Gregorian
+   */
+  public static int convertDateToHybrid(int proleptic) {
+    int hyrbid = proleptic;
+    if (proleptic < SWITCHOVER_DAYS) {
+      String dateStr = PROLEPTIC_DATE_FORMAT.get().format(
+          new Date(TimeUnit.DAYS.toMillis(proleptic)));
+      try {
+        hyrbid = (int) TimeUnit.MILLISECONDS.toDays(
+            HYBRID_DATE_FORMAT.get().parse(dateStr).getTime());
+      } catch (ParseException e) {
+        throw new IllegalArgumentException("Can't parse " + dateStr, e);
+      }
+    }
+    return hyrbid;
+  }
+
+  public static int convertDate(int original,
+                                boolean fromProleptic,
+                                boolean toProleptic) {
+    if (fromProleptic != toProleptic) {
+      return toProleptic
+                 ? convertDateToProleptic(original)
+                 : convertDateToHybrid(original);
+    } else {
+      return original;
+    }
+  }
+
+  public static long convertTime(long original,
+                                 boolean fromProleptic,
+                                 boolean toProleptic) {
+    if (fromProleptic != toProleptic) {
+      return toProleptic
+                 ? convertTimeToProleptic(original)
+                 : convertTimeToHybrid(original);
+    } else {
+      return original;
+    }
+  }
+  /**
+   * Convert epoch millis from the hybrid Julian/Gregorian calendar to the
+   * proleptic Gregorian.
+   * @param hybrid millis of epoch in the hybrid Julian/Gregorian
+   * @return millis of epoch in the proleptic Gregorian
+   */
+  public static long convertTimeToProleptic(long hybrid) {
+    long proleptic = hybrid;
+    if (hybrid < SWITCHOVER_MILLIS) {
+      String dateStr = HYBRID_TIME_FORMAT.get().format(new Date(hybrid));
+      try {
+        proleptic = PROLEPTIC_TIME_FORMAT.get().parse(dateStr).getTime();
+      } catch (ParseException e) {
+        throw new IllegalArgumentException("Can't parse " + dateStr, e);
+      }
+    }
+    return proleptic;
+  }
+
+  /**
+   * Convert epoch millis from the proleptic Gregorian calendar to the hybrid
+   * Julian/Gregorian.
+   * @param proleptic millis of epoch in the proleptic Gregorian
+   * @return millis of epoch in the hybrid Julian/Gregorian
+   */
+  public static long convertTimeToHybrid(long proleptic) {
+    long hybrid = proleptic;
+    if (proleptic < SWITCHOVER_MILLIS) {
+      String dateStr = PROLEPTIC_TIME_FORMAT.get().format(new Date(proleptic));
+      try {
+        hybrid = HYBRID_TIME_FORMAT.get().parse(dateStr).getTime();
+      } catch (ParseException e) {
+        throw new IllegalArgumentException("Can't parse " + dateStr, e);
+      }
+    }
+    return hybrid;
+  }
+
+  private DateUtils() {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 09cfe82..5a86440 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -30,6 +30,7 @@ import java.util.function.Supplier;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.orc.CompressionKind;
 import org.apache.orc.FileMetadata;
+import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcUtils;
 import org.apache.orc.Reader;
@@ -228,12 +229,12 @@ public class ReaderImpl implements Reader {
     return deserializeStats(schema, fileStats);
   }
 
-  public static ColumnStatistics[] deserializeStats(
+  public ColumnStatistics[] deserializeStats(
       TypeDescription schema,
       List<OrcProto.ColumnStatistics> fileStats) {
     ColumnStatistics[] result = new ColumnStatistics[fileStats.size()];
     for(int i=0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(schema, fileStats.get(i));
+      result[i] = ColumnStatisticsImpl.deserialize(schema, fileStats.get(i), this);
     }
     return result;
   }
@@ -645,6 +646,14 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
+  public boolean writerUsedProlepticGregorian() {
+    OrcProto.Footer footer = tail.getFooter();
+    return footer.hasCalendar()
+               ? footer.getCalendar() == OrcProto.CalendarKind.PROLEPTIC_GREGORIAN
+               : OrcConf.PROLEPTIC_GREGORIAN_DEFAULT.getBoolean(conf);
+  }
+
+  @Override
   public Options options() {
     return new Options(conf);
   }
@@ -825,7 +834,7 @@ public class ReaderImpl implements Reader {
     }
     List<StripeStatistics> result = new ArrayList<>();
     for (OrcProto.StripeStatistics ss : stripeStats) {
-      result.add(new StripeStatistics(ss.getColStatsList()));
+      result.add(new StripeStatistics(ss.getColStatsList(), this));
     }
     return result;
   }
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 84e1b93..b07dbb2 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -215,7 +215,9 @@ public class RecordReaderImpl implements RecordReader {
           rowIndexStride,
           evolution,
           writerVersion,
-          fileReader.useUTCTimestamp);
+          fileReader.useUTCTimestamp,
+          fileReader.writerUsedProlepticGregorian(),
+          fileReader.options.getConvertToProlepticGregorian());
     } else {
       sargApp = null;
     }
@@ -269,7 +271,9 @@ public class RecordReaderImpl implements RecordReader {
           .setSchemaEvolution(evolution)
           .skipCorrupt(skipCorrupt)
           .fileFormat(fileReader.getFileVersion())
-          .useUTCTimestamp(fileReader.useUTCTimestamp);
+          .useUTCTimestamp(fileReader.useUTCTimestamp)
+          .setProlepticGregorian(fileReader.writerUsedProlepticGregorian(),
+                                 fileReader.options.getConvertToProlepticGregorian());
     reader = TreeReaderFactory.createTreeReader(evolution.getReaderSchema(),
         readerContext);
 
@@ -887,15 +891,21 @@ public class RecordReaderImpl implements RecordReader {
     private SchemaEvolution evolution;
     private final long[] exceptionCount;
     private final boolean useUTCTimestamp;
+    private final boolean writerUsedProlepticGregorian;
+    private final boolean convertToProlepticGregorian;
 
     public SargApplier(SearchArgument sarg,
                        long rowIndexStride,
                        SchemaEvolution evolution,
                        OrcFile.WriterVersion writerVersion,
-                       boolean useUTCTimestamp) {
+                       boolean useUTCTimestamp,
+                       boolean writerUsedProlepticGregorian,
+                       boolean convertToProlepticGregorian) {
       this.writerVersion = writerVersion;
       this.sarg = sarg;
       sargLeaves = sarg.getLeaves();
+      this.writerUsedProlepticGregorian = writerUsedProlepticGregorian;
+      this.convertToProlepticGregorian = convertToProlepticGregorian;
       filterColumns = mapSargColumnsToOrcInternalColIdx(sargLeaves,
                                                         evolution);
       this.rowIndexStride = rowIndexStride;
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index e8e189a..539a57a 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -30,6 +30,7 @@ import java.util.TimeZone;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -61,6 +62,9 @@ public class TreeReaderFactory {
     String getWriterTimezone();
 
     OrcFile.Version getFileFormat();
+    boolean useProlepticGregorian();
+
+    boolean fileUsedProlepticGregorian();
   }
 
   public static class ReaderContext implements Context {
@@ -69,6 +73,8 @@ public class TreeReaderFactory {
     private boolean useUTCTimestamp = false;
     private String writerTimezone;
     private OrcFile.Version fileFormat;
+    private boolean useProlepticGregorian;
+    private boolean fileUsedProlepticGregorian;
 
     public ReaderContext setSchemaEvolution(SchemaEvolution evolution) {
       this.evolution = evolution;
@@ -95,6 +101,13 @@ public class TreeReaderFactory {
       return this;
     }
 
+    public ReaderContext setProlepticGregorian(boolean file,
+                                               boolean reader) {
+      this.useProlepticGregorian = reader;
+      this.fileUsedProlepticGregorian = file;
+      return this;
+    }
+
     @Override
     public SchemaEvolution getSchemaEvolution() {
       return evolution;
@@ -119,6 +132,16 @@ public class TreeReaderFactory {
     public OrcFile.Version getFileFormat() {
       return fileFormat;
     }
+
+    @Override
+    public boolean useProlepticGregorian() {
+      return useProlepticGregorian;
+    }
+
+    @Override
+    public boolean fileUsedProlepticGregorian() {
+      return fileUsedProlepticGregorian;
+    }
   }
 
   public abstract static class TreeReader {
@@ -900,6 +923,8 @@ public class TreeReaderFactory {
     private TimeZone writerTimeZone;
     private boolean hasSameTZRules;
     private ThreadLocal<DateFormat> threadLocalDateFormat;
+    private final boolean useProleptic;
+    private final boolean fileUsesProleptic;
 
     TimestampTreeReader(int columnId, Context context) throws IOException {
       this(columnId, null, null, null, null, context);
@@ -936,6 +961,8 @@ public class TreeReaderFactory {
         }
         base_timestamp = getBaseTimestamp(context.getWriterTimezone());
       }
+      fileUsesProleptic = context.fileUsedProlepticGregorian();
+      useProleptic = context.useProlepticGregorian();
     }
 
     @Override
@@ -1004,6 +1031,7 @@ public class TreeReaderFactory {
                            boolean[] isNull,
                            final int batchSize) throws IOException {
       TimestampColumnVector result = (TimestampColumnVector) previousVector;
+      result.changeCalendar(fileUsesProleptic, false);
       super.nextVector(previousVector, isNull, batchSize);
 
       result.setIsUTC(context.getUseUTCTimestamp());
@@ -1041,6 +1069,7 @@ public class TreeReaderFactory {
           }
         }
       }
+      result.changeCalendar(useProleptic, true);
     }
 
     private static int parseNanos(long serialized) {
@@ -1064,6 +1093,9 @@ public class TreeReaderFactory {
 
   public static class DateTreeReader extends TreeReader {
     protected IntegerReader reader = null;
+    private final boolean needsDateColumnVector;
+    private final boolean useProleptic;
+    private final boolean fileUsesProleptic;
 
     DateTreeReader(int columnId, Context context) throws IOException {
       this(columnId, null, null, null, context);
@@ -1072,6 +1104,10 @@ public class TreeReaderFactory {
     protected DateTreeReader(int columnId, InStream present, InStream data,
         OrcProto.ColumnEncoding encoding, Context context) throws IOException {
       super(columnId, present, context);
+      useProleptic = context.useProlepticGregorian();
+      fileUsesProleptic = context.fileUsedProlepticGregorian();
+      // if either side is proleptic, we need a DateColumnVector
+      needsDateColumnVector = useProleptic || fileUsesProleptic;
       if (data != null && encoding != null) {
         checkEncoding(encoding);
         reader = createIntegerReader(encoding.getKind(), data, true, context);
@@ -1114,12 +1150,23 @@ public class TreeReaderFactory {
                            boolean[] isNull,
                            final int batchSize) throws IOException {
       final LongColumnVector result = (LongColumnVector) previousVector;
+      if (needsDateColumnVector) {
+        if (result instanceof DateColumnVector) {
+          ((DateColumnVector) result).changeCalendar(fileUsesProleptic, false);
+        } else {
+          throw new IllegalArgumentException("Can't use LongColumnVector to " +
+                                                 "read proleptic Gregorian dates.");
+        }
+      }
 
       // Read present/isNull stream
       super.nextVector(result, isNull, batchSize);
 
       // Read value entries based on isNull entries
       reader.nextVector(result, result.vector, batchSize);
+      if (needsDateColumnVector) {
+        ((DateColumnVector) result).changeCalendar(useProleptic, true);
+      }
     }
 
     @Override
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index c7e5818..7eea7f7 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -120,12 +120,14 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
   private final boolean useUTCTimeZone;
   private final double dictionaryKeySizeThreshold;
   private final boolean[] directEncodingColumns;
+  private final boolean useProlepticGregorian;
 
   public WriterImpl(FileSystem fs,
                     Path path,
                     OrcFile.WriterOptions opts) throws IOException {
     this.path = path;
     this.conf = opts.getConfiguration();
+    useProlepticGregorian = opts.getProlepticGregorian();
     this.callback = opts.getCallback();
     this.schema = opts.getSchema();
     this.writerVersion = opts.getWriterVersion();
@@ -438,6 +440,11 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
     public double getDictionaryKeySizeThreshold(int columnId) {
       return directEncodingColumns[columnId] ? 0.0 : dictionaryKeySizeThreshold;
     }
+
+    @Override
+    public boolean getProlepticGregorian() {
+      return useProlepticGregorian;
+    }
   }
 
 
@@ -536,6 +543,11 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
     rawDataSize = computeRawDataSize();
     // serialize the types
     writeTypes(builder, schema);
+    if (hasDateOrTime(schema)) {
+      builder.setCalendar(useProlepticGregorian
+                              ? OrcProto.CalendarKind.PROLEPTIC_GREGORIAN
+                              : OrcProto.CalendarKind.JULIAN_GREGORIAN);
+    }
     // add the stripe information
     for(OrcProto.StripeInformation stripe: stripes) {
       builder.addStripes(stripe);
@@ -643,8 +655,9 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
 
   @Override
   public void appendStripe(byte[] stripe, int offset, int length,
-      StripeInformation stripeInfo,
-      OrcProto.StripeStatistics stripeStatistics) throws IOException {
+                           StripeInformation stripeInfo,
+                           OrcProto.StripeStatistics stripeStatistics
+                           ) throws IOException {
     checkArgument(stripe != null, "Stripe must not be null");
     checkArgument(length <= stripe.length,
         "Specified length must not be greater specified array length");
@@ -691,7 +704,12 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
 
     // add the column statistics
     writeFileStatistics(builder, treeWriter);
-    return ReaderImpl.deserializeStats(schema, builder.getStatisticsList());
+    List<OrcProto.ColumnStatistics> fileStats = builder.getStatisticsList();
+    ColumnStatistics[] result = new ColumnStatistics[fileStats.size()];
+    for(int i=0; i < result.length; ++i) {
+      result[i] = ColumnStatisticsImpl.deserialize(schema, fileStats.get(i));
+    }
+    return result;
   }
 
   public CompressionCodec getCompressionCodec() {
@@ -712,4 +730,22 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
     }
     return false;
   }
+
+  private static boolean hasDateOrTime(TypeDescription schema) {
+    switch (schema.getCategory()) {
+    case TIMESTAMP:
+    case DATE:
+      return true;
+    default:
+    }
+    List<TypeDescription> children = schema.getChildren();
+    if (children != null) {
+      for(TypeDescription child: children) {
+        if (hasDateOrTime(child)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
 }
diff --git a/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
index 209dd0e..4289b57 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
@@ -19,6 +19,7 @@
 package org.apache.orc.impl.writer;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.orc.OrcProto;
@@ -32,6 +33,7 @@ import java.io.IOException;
 public class DateTreeWriter extends TreeWriterBase {
   private final IntegerWriter writer;
   private final boolean isDirectV2;
+  private final boolean useProleptic;
 
   public DateTreeWriter(int columnId,
                         TypeDescription schema,
@@ -45,6 +47,7 @@ public class DateTreeWriter extends TreeWriterBase {
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
+    useProleptic = writer.getProlepticGregorian();
   }
 
   @Override
@@ -52,6 +55,12 @@ public class DateTreeWriter extends TreeWriterBase {
                          int length) throws IOException {
     super.writeBatch(vector, offset, length);
     LongColumnVector vec = (LongColumnVector) vector;
+    if (vector instanceof  DateColumnVector) {
+      ((DateColumnVector) vec).changeCalendar(useProleptic, true);
+    } else if (useProleptic) {
+      throw new IllegalArgumentException("Can't use LongColumnVector to write" +
+                                             " proleptic dates");
+    }
     if (vector.isRepeating) {
       if (vector.noNulls || !vector.isNull[0]) {
         int value = (int) vec.vector[0];
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
index 0f30d07..c7a751a 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
@@ -44,6 +44,7 @@ public class TimestampTreeWriter extends TreeWriterBase {
   private final TimeZone localTimezone;
   private final long baseEpochSecsLocalTz;
   private final long baseEpochSecsUTC;
+  private final boolean useProleptic;
 
   public TimestampTreeWriter(int columnId,
                              TypeDescription schema,
@@ -77,6 +78,7 @@ public class TimestampTreeWriter extends TreeWriterBase {
     } catch (ParseException e) {
       throw new IOException("Unable to create base timestamp tree writer", e);
     }
+    useProleptic = writer.getProlepticGregorian();
   }
 
   @Override
@@ -95,6 +97,7 @@ public class TimestampTreeWriter extends TreeWriterBase {
                          int length) throws IOException {
     super.writeBatch(vector, offset, length);
     TimestampColumnVector vec = (TimestampColumnVector) vector;
+    vec.changeCalendar(useProleptic, true);
     if (vector.isRepeating) {
       if (vector.noNulls || !vector.isNull[0]) {
         // ignore the bottom three digits from the vec.time field
diff --git a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
index 9ef3dda..0727d30 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
@@ -105,4 +105,10 @@ public interface WriterContext {
     boolean getUseUTCTimestamp();
 
     double getDictionaryKeySizeThreshold(int column);
+
+  /**
+   * Should we write the data using the proleptic Gregorian calendar?
+   * @return true if we should use the proleptic Gregorian calendar
+   */
+  boolean getProlepticGregorian();
 }
diff --git a/java/core/src/test/org/apache/orc/TestProlepticConversions.java b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
new file mode 100644
index 0000000..4d18412
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.File;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.List;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class TestProlepticConversions {
+
+  @Parameterized.Parameter
+  public boolean writerProlepticGregorian;
+
+  @Parameterized.Parameter(1)
+  public boolean readerProlepticGregorian;
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> getParameters() {
+    List<Object[]> result = new ArrayList<>();
+    final boolean[] BOOLEANS = new boolean[]{false, true};
+    for(Boolean writer: BOOLEANS) {
+      for (Boolean reader: BOOLEANS) {
+        result.add(new Object[]{writer, reader});
+      }
+    }
+    return result;
+  }
+
+  private Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  private final Configuration conf;
+  private final TimeZone UTC = TimeZone.getTimeZone("UTC");
+  private final GregorianCalendar PROLEPTIC = new GregorianCalendar();
+  private final GregorianCalendar HYBRID = new GregorianCalendar();
+  {
+    conf = new Configuration();
+    PROLEPTIC.setTimeZone(UTC);
+    PROLEPTIC.setGregorianChange(new Date(Long.MIN_VALUE));
+    HYBRID.setTimeZone(UTC);
+  }
+
+  private FileSystem fs;
+  private Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void setupPath() throws Exception {
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestProlepticConversion." +
+       testCaseName.getMethodName().replaceFirst("\\[[0-9]+]", "") + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  private SimpleDateFormat createParser(String format, GregorianCalendar calendar) {
+    SimpleDateFormat result = new SimpleDateFormat(format);
+    result.setCalendar(calendar);
+    return result;
+  }
+
+  @Test
+  public void testReadWrite() throws Exception {
+    TypeDescription schema = TypeDescription.fromString(
+        "struct<d:date,t:timestamp>");
+    try (Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .fileSystem(fs)
+            .setProlepticGregorian(writerProlepticGregorian))) {
+      VectorizedRowBatch batch = schema.createRowBatchV2();
+      batch.size = 1024;
+      DateColumnVector d = (DateColumnVector) batch.cols[0];
+      TimestampColumnVector t = (TimestampColumnVector) batch.cols[1];
+      d.changeCalendar(writerProlepticGregorian, false);
+      t.changeCalendar(writerProlepticGregorian, false);
+      GregorianCalendar cal = writerProlepticGregorian ? PROLEPTIC : HYBRID;
+      SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal);
+      SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal);
+      for(int r=0; r < batch.size; ++r) {
+        d.vector[r] = TimeUnit.MILLISECONDS.toDays(
+            dateFormat.parse(String.format("%04d-01-23", r * 2 + 1)).getTime());
+        Date val = timeFormat.parse(
+            String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r % 24));
+        t.time[r] = val.getTime();
+        t.nanos[r] = 0;
+      }
+      writer.addRowBatch(batch);
+    }
+    try (Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf)
+            .filesystem(fs)
+            .convertToProlepticGregorian(readerProlepticGregorian));
+         RecordReader rows = reader.rows(reader.options())) {
+      assertEquals(writerProlepticGregorian, reader.writerUsedProlepticGregorian());
+      VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
+      DateColumnVector d = (DateColumnVector) batch.cols[0];
+      TimestampColumnVector t = (TimestampColumnVector) batch.cols[1];
+      GregorianCalendar cal = readerProlepticGregorian ? PROLEPTIC : HYBRID;
+      SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal);
+      SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal);
+
+      // Check the file statistics
+      ColumnStatistics[] colStats = reader.getStatistics();
+      DateColumnStatistics dStats = (DateColumnStatistics) colStats[1];
+      TimestampColumnStatistics tStats = (TimestampColumnStatistics) colStats[2];
+      assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum()));
+      assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum()));
+      assertEquals("0001-03-21 00:12:34", timeFormat.format(tStats.getMinimum()));
+      assertEquals("2047-03-21 15:12:34", timeFormat.format(tStats.getMaximum()));
+
+      // Check the stripe stats
+      List<StripeStatistics> stripeStats = reader.getStripeStatistics();
+      assertEquals(1, stripeStats.size());
+      colStats = stripeStats.get(0).getColumnStatistics();
+      dStats = (DateColumnStatistics) colStats[1];
+      tStats = (TimestampColumnStatistics) colStats[2];
+      assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum()));
+      assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum()));
+      assertEquals("0001-03-21 00:12:34", timeFormat.format(tStats.getMinimum()));
+      assertEquals("2047-03-21 15:12:34", timeFormat.format(tStats.getMaximum()));
+
+      // Check the data
+      assertTrue(rows.nextBatch(batch));
+      assertEquals(1024, batch.size);
+      // Ensure the column vectors are using the right calendar
+      assertEquals(readerProlepticGregorian, d.isUsingProlepticCalendar());
+      assertEquals(readerProlepticGregorian, t.usingProlepticCalendar());
+      for(int r=0; r < batch.size; ++r) {
+        String expectedD = String.format("%04d-01-23", r * 2 + 1);
+        String expectedT = String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r % 24);
+        assertEquals("row " + r, expectedD, dateFormat.format(
+            new Date(TimeUnit.DAYS.toMillis(d.vector[r]))));
+        assertEquals("row " + r, expectedT, timeFormat.format(t.asScratchTimestamp(r)));
+      }
+    }
+  }
+}
diff --git a/java/core/src/test/org/apache/orc/TestStringDictionary.java b/java/core/src/test/org/apache/orc/TestStringDictionary.java
index 203f58e..91fff0b 100644
--- a/java/core/src/test/org/apache/orc/TestStringDictionary.java
+++ b/java/core/src/test/org/apache/orc/TestStringDictionary.java
@@ -249,6 +249,11 @@ public class TestStringDictionary {
     public double getDictionaryKeySizeThreshold(int column) {
       return OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf);
     }
+
+    @Override
+    public boolean getProlepticGregorian() {
+      return false;
+    }
   }
 
   @Test
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 72d2211..4c740c2 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -2023,7 +2023,7 @@ public class TestRecordReaderImpl {
             .end().build();
     RecordReaderImpl.SargApplier applier =
         new RecordReaderImpl.SargApplier(sarg, 1000, evolution,
-            OrcFile.WriterVersion.ORC_135, false);
+            OrcFile.WriterVersion.ORC_135, false, false, false);
     OrcProto.StripeInformation stripe =
         OrcProto.StripeInformation.newBuilder().setNumberOfRows(4000).build();
     OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[3];
@@ -2071,7 +2071,7 @@ public class TestRecordReaderImpl {
             .end().build();
     RecordReaderImpl.SargApplier applier =
         new RecordReaderImpl.SargApplier(sarg, 1000, evolution,
-            OrcFile.WriterVersion.ORC_135, false);
+            OrcFile.WriterVersion.ORC_135, false, false, false);
     OrcProto.StripeInformation stripe =
         OrcProto.StripeInformation.newBuilder().setNumberOfRows(3000).build();
     OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[3];
@@ -2103,6 +2103,50 @@ public class TestRecordReaderImpl {
   }
 
   @Test
+  public void testPositionalEvolutionAddColumnPPD() throws IOException {
+    Reader.Options opts = new Reader.Options();
+    opts.forcePositionalEvolution(true);
+
+    TypeDescription file = TypeDescription.fromString("struct<x:int>");
+    // new column added on reader side
+    TypeDescription read = TypeDescription.fromString("struct<x:int,y:boolean>");
+    opts.include(includeAll(read));
+
+    SchemaEvolution evo = new SchemaEvolution(file, read, opts);
+
+    SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd()
+        .equals("y", PredicateLeaf.Type.BOOLEAN, true).end().build();
+
+    RecordReaderImpl.SargApplier applier =
+        new RecordReaderImpl.SargApplier(sarg, 1000, evo,
+            OrcFile.WriterVersion.ORC_135, false, false, false);
+
+    OrcProto.StripeInformation stripe =
+        OrcProto.StripeInformation.newBuilder().setNumberOfRows(2000).build();
+
+    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[3];
+    indexes[1] = OrcProto.RowIndex.newBuilder() // index for original x column
+        .addEntry(createIndexEntry(0L, 10L))
+        .addEntry(createIndexEntry(100L, 200L))
+        .build();
+    indexes[2] = null; // no-op, just for clarifying that new reader column doesn't have an index
+
+    List<OrcProto.ColumnEncoding> encodings = new ArrayList<>();
+    encodings.add(OrcProto.ColumnEncoding.newBuilder().setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+
+    boolean[] rows = applier.pickRowGroups(new ReaderImpl.StripeInformationImpl(stripe),
+        indexes, null, encodings, null, false);
+    assertEquals(RecordReaderImpl.SargApplier.READ_ALL_RGS, rows); //cannot filter for new column, return all rows
+  }
+
+  private boolean[] includeAll(TypeDescription readerType) {
+    int numColumns = readerType.getMaximumId() + 1;
+    boolean[] result = new boolean[numColumns];
+    Arrays.fill(result, true);
+    return result;
+  }
+
+  @Test
   public void testSkipDataReaderOpen() throws Exception {
     IOException ioe = new IOException("Don't open when there is no stripe");
 
diff --git a/java/pom.xml b/java/pom.xml
index 5dadc0c..6d1c696 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -70,7 +70,7 @@
 
     <min.hadoop.version>2.2.0</min.hadoop.version>
     <hadoop.version>2.7.3</hadoop.version>
-    <storage-api.version>2.6.0</storage-api.version>
+    <storage-api.version>2.7.1</storage-api.version>
     <zookeeper.version>3.4.6</zookeeper.version>
   </properties>
 
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index 3eae30d..a536f55 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.util.BloomFilter;
 import org.apache.orc.util.BloomFilterIO;
 import org.apache.orc.ColumnStatistics;
@@ -47,7 +48,6 @@ import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
-import org.apache.orc.impl.AcidStats;
 import org.apache.orc.impl.ColumnStatisticsImpl;
 import org.apache.orc.impl.OrcAcidUtils;
 import org.apache.orc.impl.OrcIndex;
@@ -312,6 +312,9 @@ public final class FileDump {
     if (reader.getCompressionKind() != CompressionKind.NONE) {
       System.out.println("Compression size: " + reader.getCompressionSize());
     }
+    System.out.println("Calendar: " + (reader.writerUsedProlepticGregorian()
+                           ? "Proleptic Gregorian"
+                           : "Julian/Gregorian"));
     System.out.println("Type: " + reader.getSchema().toString());
     System.out.println("\nStripe Statistics:");
     List<StripeStatistics> stripeStats = reader.getStripeStatistics();
@@ -385,7 +388,7 @@ public final class FileDump {
         for (int col : rowIndexCols) {
           StringBuilder buf = new StringBuilder();
           String rowIdxString = getFormattedRowIndices(col,
-              indices.getRowGroupIndex(), schema);
+              indices.getRowGroupIndex(), schema, (ReaderImpl) reader);
           buf.append(rowIdxString);
           String bloomFilString = getFormattedBloomFilters(col, indices,
               reader.getWriterVersion(),
@@ -664,7 +667,8 @@ public final class FileDump {
 
   private static String getFormattedRowIndices(int col,
                                                OrcProto.RowIndex[] rowGroupIndex,
-                                               TypeDescription schema) {
+                                               TypeDescription schema,
+                                               ReaderImpl reader) {
     StringBuilder buf = new StringBuilder();
     OrcProto.RowIndex index;
     buf.append("    Row group indices for column ").append(col).append(":");
@@ -687,7 +691,7 @@ public final class FileDump {
         buf.append("no stats at ");
       } else {
         ColumnStatistics cs =
-            ColumnStatisticsImpl.deserialize(colSchema, colStats);
+            ColumnStatisticsImpl.deserialize(colSchema, colStats, reader);
         buf.append(cs.toString());
       }
       buf.append(" positions: ");
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index c02ff20..e1d6301 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -30,6 +30,7 @@ import org.apache.orc.Reader;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.impl.AcidStats;
 import org.apache.orc.impl.OrcAcidUtils;
+import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.RecordReaderImpl;
 import org.apache.orc.util.BloomFilter;
 import org.codehaus.jettison.json.JSONArray;
@@ -52,16 +53,12 @@ import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
 import org.codehaus.jettison.json.JSONStringer;
 import org.codehaus.jettison.json.JSONWriter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * File dump tool with json formatted output.
  */
 public class JsonFileDump {
 
-  private static final Logger LOG = LoggerFactory.getLogger(JsonFileDump.class);
-
   public static void printJsonMetaData(List<String> files,
       Configuration conf,
       List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone)
@@ -101,6 +98,9 @@ public class JsonFileDump {
         writeSchema(writer, reader.getTypes());
         writer.endArray();
 
+        writer.key("calendar").value(reader.writerUsedProlepticGregorian()
+                                         ? "proleptic Gregorian"
+                                         : "Julian/Gregorian");
         writer.key("stripeStatistics").array();
         List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
         for (int n = 0; n < stripeStatistics.size(); n++) {
@@ -191,7 +191,7 @@ public class JsonFileDump {
               writer.object();
               writer.key("columnId").value(col);
               writeRowGroupIndexes(writer, col, indices.getRowGroupIndex(),
-                  reader.getSchema());
+                  reader.getSchema(), (ReaderImpl) reader);
               writeBloomFilterIndexes(writer, col, indices,
                   reader.getWriterVersion(),
                   reader.getSchema().findSubtype(col).getCategory(),
@@ -399,9 +399,9 @@ public class JsonFileDump {
   }
 
   private static void writeRowGroupIndexes(JSONWriter writer, int col,
-      OrcProto.RowIndex[] rowGroupIndex, TypeDescription schema)
-      throws JSONException {
-
+                                           OrcProto.RowIndex[] rowGroupIndex,
+                                           TypeDescription schema,
+                                           ReaderImpl reader) throws JSONException {
     OrcProto.RowIndex index;
     if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
         ((index = rowGroupIndex[col]) == null)) {
@@ -418,7 +418,7 @@ public class JsonFileDump {
       }
       OrcProto.ColumnStatistics colStats = entry.getStatistics();
       writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(
-          schema.findSubtype(col), colStats));
+          schema.findSubtype(col), colStats, reader));
       writer.key("positions").array();
       for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
         writer.value(entry.getPositions(posIx));
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index a519efe..ddcf385 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -3,6 +3,7 @@ File Version: 0.12 with ORC_517
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096
+Calendar: Julian/Gregorian
 Type: struct<i:int,l:bigint,s:string>
 
 Stripe Statistics:
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
index 5e1925c..6e55f1e 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -3,6 +3,7 @@ File Version: 0.12 with ORC_517
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096
+Calendar: Julian/Gregorian
 Type: struct<i:int,l:bigint,s:string>
 
 Stripe Statistics:
diff --git a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
index 5befc78..64dcefc 100644
--- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -3,6 +3,7 @@ File Version: 0.12 with ORC_517
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096
+Calendar: Julian/Gregorian
 Type: struct<i:int,l:bigint,s:string>
 
 Stripe Statistics:
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 4f756c9..0db1ccf 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -34,6 +34,7 @@
       "columnType": "STRING"
     }
   ],
+  "calendar": "Julian\/Gregorian",
   "stripeStatistics": [
     {
       "stripeNumber": 1,
diff --git a/java/tools/src/test/resources/orc-file-dump.out b/java/tools/src/test/resources/orc-file-dump.out
index 6d2a912..f601edd 100644
--- a/java/tools/src/test/resources/orc-file-dump.out
+++ b/java/tools/src/test/resources/orc-file-dump.out
@@ -3,6 +3,7 @@ File Version: 0.12 with ORC_517
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096
+Calendar: Julian/Gregorian
 Type: struct<i:int,l:bigint,s:string>
 
 Stripe Statistics:
diff --git a/java/tools/src/test/resources/orc-file-has-null.out b/java/tools/src/test/resources/orc-file-has-null.out
index ee8fb12..70e2081 100644
--- a/java/tools/src/test/resources/orc-file-has-null.out
+++ b/java/tools/src/test/resources/orc-file-has-null.out
@@ -3,6 +3,7 @@ File Version: 0.12 with ORC_517
 Rows: 20000
 Compression: ZLIB
 Compression size: 4096
+Calendar: Julian/Gregorian
 Type: struct<bytes1:binary,string1:string>
 
 Stripe Statistics:
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 27820b4..24a62a4 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -194,6 +194,15 @@ message Metadata {
   repeated StripeStatistics stripeStats = 1;
 }
 
+enum CalendarKind {
+  UNKNOWN_CALENDAR = 0;
+   // The Java default calendar changes from Julian to Gregorian
+   // in 1583.
+  JULIAN_GREGORIAN = 1;
+  // A calendar that extends the Gregorian calendar back forever.
+  PROLEPTIC_GREGORIAN = 2;
+}
+
 message Footer {
   optional uint64 headerLength = 1;
   optional uint64 contentLength = 2;
@@ -210,6 +219,8 @@ message Footer {
   // 2 = Presto
   // 3 = Scritchley Go from https://github.com/scritchley/orc
   optional uint32 writer = 9;
+  // encryption is 10
+  optional CalendarKind calendar = 11;
 }
 
 enum CompressionKind {