You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@orc.apache.org by om...@apache.org on 2016/06/30 22:00:14 UTC

[1/7] orc git commit: HIVE-14012. Some ColumnVector subclasses are missing ensureSize.

Repository: orc
Updated Branches:
  refs/heads/master d9d529bcf -> 98c0992ed


HIVE-14012. Some ColumnVector subclasses are missing ensureSize.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/e8c0eb54
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/e8c0eb54
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/e8c0eb54

Branch: refs/heads/master
Commit: e8c0eb541c31de500ca0428815347f4595c8e4c1
Parents: 17e14f6
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:23:26 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700

----------------------------------------------------------------------
 .../exec/vector/IntervalDayTimeColumnVector.java | 19 +++++++++++++++++++
 .../ql/exec/vector/TimestampColumnVector.java    | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/e8c0eb54/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
index 39ccea8..c4a6c0f 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -345,4 +345,23 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
       buffer.append("null");
     }
   }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (size <= totalSeconds.length) return;
+    long[] oldTime = totalSeconds;
+    int[] oldNanos = nanos;
+    totalSeconds = new long[size];
+    nanos = new int[size];
+    if (preserveData) {
+      if (isRepeating) {
+        totalSeconds[0] = oldTime[0];
+        nanos[0] = oldNanos[0];
+      } else {
+        System.arraycopy(oldTime, 0, totalSeconds, 0, oldTime.length);
+        System.arraycopy(oldNanos, 0, nanos, 0, oldNanos.length);
+      }
+    }
+  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/orc/blob/e8c0eb54/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index 228461a..0948d2d 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -397,4 +397,23 @@ public class TimestampColumnVector extends ColumnVector {
       buffer.append("null");
     }
   }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (size <= time.length) return;
+    long[] oldTime = time;
+    int[] oldNanos = nanos;
+    time = new long[size];
+    nanos = new int[size];
+    if (preserveData) {
+      if (isRepeating) {
+        time[0] = oldTime[0];
+        nanos[0] = oldNanos[0];
+      } else {
+        System.arraycopy(oldTime, 0, time, 0, oldTime.length);
+        System.arraycopy(oldNanos, 0, nanos, 0, oldNanos.length);
+      }
+    }
+  }
 }

[7/7] orc git commit: HIVE-13872. Fix cross-product reduce sink serialization.

Posted by om...@apache.org.

HIVE-13872. Fix cross-product reduce sink serialization.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/98c0992e
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/98c0992e
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/98c0992e

Branch: refs/heads/master
Commit: 98c0992ed5fb3c2dde962b405b8fbb71b6d6be85
Parents: 13ee0b3
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:41:48 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:58 2016 -0700

----------------------------------------------------------------------
 .../src/java/org/apache/orc/impl/TreeReaderFactory.java     | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/98c0992e/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 5901c8c..c4a2093 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -1732,9 +1732,12 @@ public class TreeReaderFactory {
                           int batchSize) throws IOException {
       for(int i=0; i < fields.length &&
           (vectorColumnCount == -1 || i < vectorColumnCount); ++i) {
-        batch.cols[i].reset();
-        batch.cols[i].ensureSize((int) batchSize, false);
-        fields[i].nextVector(batch.cols[i], null, batchSize);
+        ColumnVector colVector = batch.cols[i];
+        if (colVector != null) {
+          colVector.reset();
+          colVector.ensureSize((int) batchSize, false);
+          fields[i].nextVector(colVector, null, batchSize);
+        }
       }
     }

[6/7] orc git commit: HIVE-13985. ORC improvements for reducing the file system calls in the task side.

Posted by om...@apache.org.

HIVE-13985. ORC improvements for reducing the file system calls in the task side.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/13ee0b3c
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/13ee0b3c
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/13ee0b3c

Branch: refs/heads/master
Commit: 13ee0b3cdb10585b8a3c0799f8e7685472d8458e
Parents: 047265c
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:38:32 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:58 2016 -0700

----------------------------------------------------------------------
 .../src/java/org/apache/orc/FileMetaInfo.java   |  64 -----
 java/core/src/java/org/apache/orc/OrcFile.java  |  32 +--
 java/core/src/java/org/apache/orc/OrcUtils.java |  11 +
 java/core/src/java/org/apache/orc/Reader.java   |   9 +-
 .../src/java/org/apache/orc/impl/OrcTail.java   | 140 +++++++++++
 .../java/org/apache/orc/impl/ReaderImpl.java    | 240 ++++++++++---------
 proto/orc_proto.proto                           |   1 +
 7 files changed, 304 insertions(+), 193 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/FileMetaInfo.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/FileMetaInfo.java b/java/core/src/java/org/apache/orc/FileMetaInfo.java
deleted file mode 100644
index d3cac3b..0000000
--- a/java/core/src/java/org/apache/orc/FileMetaInfo.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.nio.ByteBuffer;
-import java.util.List;
-
-/**
- * FileMetaInfo - represents file metadata stored in footer and postscript sections of the file
- * that is useful for Reader implementation
- *
- */
-public class FileMetaInfo {
-  public ByteBuffer footerMetaAndPsBuffer;
-  public final String compressionType;
-  public final int bufferSize;
-  public final int metadataSize;
-  public final ByteBuffer footerBuffer;
-  public final List<Integer> versionList;
-  public final OrcFile.WriterVersion writerVersion;
-
-
-  /** Ctor used when reading splits - no version list or full footer buffer. */
-  public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-      ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
-    this(compressionType, bufferSize, metadataSize, footerBuffer, null,
-        writerVersion, null);
-  }
-
-  /** Ctor used when creating file info during init and when getting a new one. */
-  public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-      ByteBuffer footerBuffer, List<Integer> versionList,
-                      OrcFile.WriterVersion writerVersion,
-      ByteBuffer fullFooterBuffer) {
-    this.compressionType = compressionType;
-    this.bufferSize = bufferSize;
-    this.metadataSize = metadataSize;
-    this.footerBuffer = footerBuffer;
-    this.versionList = versionList;
-    this.writerVersion = writerVersion;
-    this.footerMetaAndPsBuffer = fullFooterBuffer;
-  }
-
-  public OrcFile.WriterVersion getWriterVersion() {
-    return writerVersion;
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 7dd7333..ddfa9f7 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.impl.MemoryManager;
+import org.apache.orc.impl.OrcTail;
 import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.WriterImpl;
 
@@ -160,19 +161,17 @@ public class OrcFile {
   public static class ReaderOptions {
     private final Configuration conf;
     private FileSystem filesystem;
-    private FileMetaInfo fileMetaInfo; // TODO: this comes from some place.
     private long maxLength = Long.MAX_VALUE;
-    private FileMetadata fullFileMetadata; // Propagate from LLAP cache.
+    private OrcTail orcTail;
+    // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
+    // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
+    // For now keeping this around to avoid complex surgery
+    private FileMetadata fileMetadata;
 
     public ReaderOptions(Configuration conf) {
       this.conf = conf;
     }
 
-    public ReaderOptions fileMetaInfo(FileMetaInfo info) {
-      fileMetaInfo = info;
-      return this;
-    }
-
     public ReaderOptions filesystem(FileSystem fs) {
       this.filesystem = fs;
       return this;
@@ -183,8 +182,8 @@ public class OrcFile {
       return this;
     }
 
-    public ReaderOptions fileMetadata(FileMetadata metadata) {
-      this.fullFileMetadata = metadata;
+    public ReaderOptions orcTail(OrcTail tail) {
+      this.orcTail = tail;
       return this;
     }
 
@@ -196,16 +195,21 @@ public class OrcFile {
       return filesystem;
     }
 
-    public FileMetaInfo getFileMetaInfo() {
-      return fileMetaInfo;
-    }
-
     public long getMaxLength() {
       return maxLength;
     }
 
+    public OrcTail getOrcTail() {
+      return orcTail;
+    }
+
+    public ReaderOptions fileMetadata(final FileMetadata metadata) {
+      fileMetadata = metadata;
+      return this;
+    }
+
     public FileMetadata getFileMetadata() {
-      return fullFileMetadata;
+      return fileMetadata;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java
index 9dd7504..94493b3 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -17,6 +17,8 @@
  */
 package org.apache.orc;
 
+import org.apache.orc.impl.ReaderImpl;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -525,4 +527,13 @@ public class OrcUtils {
     }
     throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
   }
+
+  public static List<StripeInformation> convertProtoStripesToStripes(
+      List<OrcProto.StripeInformation> stripes) {
+    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
+    for (OrcProto.StripeInformation info : stripes) {
+      result.add(new ReaderImpl.StripeInformationImpl(info));
+    }
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 87f3293..c2d5235 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -138,6 +138,13 @@ public interface Reader {
   OrcFile.WriterVersion getWriterVersion();
 
   /**
+   * Get the file tail (footer + postscript)
+   *
+   * @return - file tail
+   */
+  OrcProto.FileTail getFileTail();
+
+  /**
    * Options for creating a RecordReader.
    */
   public static class Options {
@@ -354,7 +361,7 @@ public interface Reader {
   /**
    * @return Stripe statistics.
    */
-  List<StripeStatistics> getStripeStatistics();
+  List<StripeStatistics> getStripeStatistics() throws IOException;
 
   /**
    * @return File statistics, in original protobuf form.

http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/impl/OrcTail.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/OrcTail.java b/java/core/src/java/org/apache/orc/impl/OrcTail.java
new file mode 100644
index 0000000..b5f85fb
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/impl/OrcTail.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import static org.apache.orc.impl.ReaderImpl.extractMetadata;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
+
+// TODO: Make OrcTail implement FileMetadata or Reader interface
+public final class OrcTail {
+  // postscript + footer - Serialized in OrcSplit
+  private final OrcProto.FileTail fileTail;
+  // serialized representation of metadata, footer and postscript
+  private final ByteBuffer serializedTail;
+  // used to invalidate cache entries
+  private final long fileModificationTime;
+  // lazily deserialized
+  private OrcProto.Metadata metadata;
+
+  public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
+    this(fileTail, serializedTail, -1);
+  }
+
+  public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
+    this.fileTail = fileTail;
+    this.serializedTail = serializedTail;
+    this.fileModificationTime = fileModificationTime;
+    this.metadata = null;
+  }
+
+  public ByteBuffer getSerializedTail() {
+    return serializedTail;
+  }
+
+  public long getFileModificationTime() {
+    return fileModificationTime;
+  }
+
+  public OrcProto.Footer getFooter() {
+    return fileTail.getFooter();
+  }
+
+  public OrcProto.PostScript getPostScript() {
+    return fileTail.getPostscript();
+  }
+
+  public OrcFile.WriterVersion getWriterVersion() {
+    OrcProto.PostScript ps = fileTail.getPostscript();
+    return (ps.hasWriterVersion()
+        ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+  }
+
+  public List<StripeInformation> getStripes() {
+    List<StripeInformation> result = new ArrayList<>(fileTail.getFooter().getStripesCount());
+    for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
+      result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
+    }
+    return result;
+  }
+
+  public CompressionKind getCompressionKind() {
+    return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
+  }
+
+  public CompressionCodec getCompressionCodec() {
+    return WriterImpl.createCodec(getCompressionKind());
+  }
+
+  public int getCompressionBufferSize() {
+    return (int) fileTail.getPostscript().getCompressionBlockSize();
+  }
+
+  public List<StripeStatistics> getStripeStatistics() throws IOException {
+    List<StripeStatistics> result = new ArrayList<>();
+    List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
+    if (ssProto != null) {
+      for (OrcProto.StripeStatistics ss : ssProto) {
+        result.add(new StripeStatistics(ss.getColStatsList()));
+      }
+    }
+    return result;
+  }
+
+  public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
+    if (serializedTail == null) return null;
+    if (metadata == null) {
+      metadata = extractMetadata(serializedTail, 0,
+          (int) fileTail.getPostscript().getMetadataLength(),
+          getCompressionCodec(), getCompressionBufferSize());
+      // clear does not clear the contents but sets position to 0 and limit = capacity
+      serializedTail.clear();
+    }
+    return metadata.getStripeStatsList();
+  }
+
+  public int getMetadataSize() {
+    return (int) getPostScript().getMetadataLength();
+  }
+
+  public List<OrcProto.Type> getTypes() {
+    return getFooter().getTypesList();
+  }
+
+  public OrcProto.FileTail getFileTail() {
+    return fileTail;
+  }
+
+  public OrcProto.FileTail getMinimalFileTail() {
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
+    OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
+    footerBuilder.clearStatistics();
+    fileTailBuilder.setFooter(footerBuilder.build());
+    OrcProto.FileTail result = fileTailBuilder.build();
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 7625d4a..a18f922 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -27,6 +27,9 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.FileMetadata;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcUtils;
 import org.apache.orc.Reader;
@@ -35,8 +38,6 @@ import org.apache.orc.TypeDescription;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.CompressionCodec;
 import org.apache.orc.FileFormatException;
-import org.apache.orc.FileMetaInfo;
-import org.apache.orc.FileMetadata;
 import org.apache.orc.StripeInformation;
 import org.apache.orc.StripeStatistics;
 import org.slf4j.Logger;
@@ -62,27 +63,25 @@ public class ReaderImpl implements Reader {
   private final long maxLength;
   protected final Path path;
   protected final org.apache.orc.CompressionKind compressionKind;
-  protected final CompressionCodec codec;
-  protected final int bufferSize;
-  private final List<OrcProto.StripeStatistics> stripeStats;
+  protected CompressionCodec codec;
+  protected int bufferSize;
+  protected OrcProto.Metadata metadata;
+  private List<OrcProto.StripeStatistics> stripeStats;
   private final int metadataSize;
   protected final List<OrcProto.Type> types;
-  private final TypeDescription schema;
+  private TypeDescription schema;
   private final List<OrcProto.UserMetadataItem> userMetadata;
   private final List<OrcProto.ColumnStatistics> fileStats;
   private final List<StripeInformation> stripes;
   protected final int rowIndexStride;
   private final long contentLength, numberOfRows;
 
-
   private long deserializedSize = -1;
   protected final Configuration conf;
   private final List<Integer> versionList;
   private final OrcFile.WriterVersion writerVersion;
 
-  // Same for metastore cache - maintains the same background buffer, but includes postscript.
-  // This will only be set if the file footer/metadata was read from disk.
-  private final ByteBuffer footerMetaAndPsBuffer;
+  protected OrcTail tail;
 
   public static class StripeInformationImpl
       implements StripeInformation {
@@ -206,6 +205,11 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
+  public OrcProto.FileTail getFileTail() {
+    return tail.getFileTail();
+  }
+
+  @Override
   public int getRowIndexStride() {
     return rowIndexStride;
   }
@@ -260,6 +264,32 @@ public class ReaderImpl implements Reader {
   }
 
   /**
+   * Ensure this is an ORC file to prevent users from trying to read text
+   * files or RC files as ORC files.
+   * @param psLen the postscript length
+   * @param buffer the tail of the file
+   * @throws IOException
+   */
+  protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
+    int magicLength = OrcFile.MAGIC.length();
+    int fullLength = magicLength + 1;
+    if (psLen < fullLength || buffer.remaining() < fullLength) {
+      throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+    }
+
+    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
+    byte[] array = buffer.array();
+    // now look for the magic string at the end of the postscript.
+    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
+      // if it isn't there, this may be 0.11.0 version of the ORC file.
+      // Read the first 3 bytes from the buffer to check for the header
+      if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
+        throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+      }
+    }
+  }
+
+  /**
    * Build a version string out of an array.
    * @param version the version number as a list
    * @return the human readable form of the version string
@@ -315,7 +345,6 @@ public class ReaderImpl implements Reader {
     this.path = path;
     this.conf = options.getConfiguration();
     this.maxLength = options.getMaxLength();
-
     FileMetadata fileMetadata = options.getFileMetadata();
     if (fileMetadata != null) {
       this.compressionKind = fileMetadata.getCompressionKind();
@@ -333,38 +362,28 @@ public class ReaderImpl implements Reader {
       this.fileStats = fileMetadata.getFileStats();
       this.stripes = fileMetadata.getStripes();
       this.userMetadata = null; // not cached and not needed here
-      this.footerMetaAndPsBuffer = null;
     } else {
-      FileMetaInfo footerMetaData;
-      if (options.getFileMetaInfo() != null) {
-        footerMetaData = options.getFileMetaInfo();
-        this.footerMetaAndPsBuffer = null;
+      OrcTail orcTail = options.getOrcTail();
+      if (orcTail == null) {
+        tail = extractFileTail(fs, path, options.getMaxLength());
+        options.orcTail(tail);
       } else {
-        footerMetaData = extractMetaInfoFromFooter(fs, path,
-            options.getMaxLength());
-        this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
+        tail = orcTail;
       }
-      options.fileMetaInfo(footerMetaData);
-      MetaInfoObjExtractor rInfo =
-          new MetaInfoObjExtractor(footerMetaData.compressionType,
-                                   footerMetaData.bufferSize,
-                                   footerMetaData.metadataSize,
-                                   footerMetaData.footerBuffer
-                                   );
-      this.compressionKind = rInfo.compressionKind;
-      this.codec = rInfo.codec;
-      this.bufferSize = rInfo.bufferSize;
-      this.metadataSize = rInfo.metadataSize;
-      this.stripeStats = rInfo.metadata.getStripeStatsList();
-      this.types = rInfo.footer.getTypesList();
-      this.rowIndexStride = rInfo.footer.getRowIndexStride();
-      this.contentLength = rInfo.footer.getContentLength();
-      this.numberOfRows = rInfo.footer.getNumberOfRows();
-      this.userMetadata = rInfo.footer.getMetadataList();
-      this.fileStats = rInfo.footer.getStatisticsList();
-      this.versionList = footerMetaData.versionList;
-      this.writerVersion = footerMetaData.writerVersion;
-      this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList());
+      this.compressionKind = tail.getCompressionKind();
+      this.codec = tail.getCompressionCodec();
+      this.bufferSize = tail.getCompressionBufferSize();
+      this.metadataSize = tail.getMetadataSize();
+      this.versionList = tail.getPostScript().getVersionList();
+      this.types = tail.getFooter().getTypesList();
+      this.rowIndexStride = tail.getFooter().getRowIndexStride();
+      this.contentLength = tail.getFooter().getContentLength();
+      this.numberOfRows = tail.getFooter().getNumberOfRows();
+      this.userMetadata = tail.getFooter().getMetadataList();
+      this.fileStats = tail.getFooter().getStatisticsList();
+      this.writerVersion = tail.getWriterVersion();
+      this.stripes = tail.getStripes();
+      this.stripeStats = tail.getStripeStatisticsProto();
     }
     this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
   }
@@ -397,7 +416,7 @@ public class ReaderImpl implements Reader {
         singleton(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
   }
 
-  private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+  public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
       int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
     bb.position(metadataAbsPos);
     bb.limit(metadataAbsPos + metadataSize);
@@ -430,22 +449,55 @@ public class ReaderImpl implements Reader {
     return ps;
   }
 
-  private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
-                                                        Path path,
-                                                        long maxFileLength
-                                                        ) throws IOException {
+  public static OrcTail extractFileTail(ByteBuffer buffer)
+      throws IOException {
+    return extractFileTail(buffer, -1, -1);
+  }
+
+  public static OrcTail extractFileTail(ByteBuffer buffer, long fileLength, long modificationTime)
+      throws IOException {
+    int readSize = buffer.limit();
+    int psLen = buffer.get(readSize - 1) & 0xff;
+    int psOffset = readSize - 1 - psLen;
+    ensureOrcFooter(buffer, psLen);
+    byte[] psBuffer = new byte[psLen];
+    System.arraycopy(buffer.array(), psOffset, psBuffer, 0, psLen);
+    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(psBuffer);
+    int footerSize = (int) ps.getFooterLength();
+    CompressionCodec codec = WriterImpl
+        .createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+    OrcProto.Footer footer = extractFooter(buffer,
+        (int) (buffer.position() + ps.getMetadataLength()),
+        footerSize, codec, (int) ps.getCompressionBlockSize());
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder()
+        .setPostscriptLength(psLen)
+        .setPostscript(ps)
+        .setFooter(footer)
+        .setFileLength(fileLength);
+    // clear does not clear the contents but sets position to 0 and limit = capacity
+    buffer.clear();
+    return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
+  }
+
+  protected OrcTail extractFileTail(FileSystem fs, Path path,
+      long maxFileLength) throws IOException {
     FSDataInputStream file = fs.open(path);
-    ByteBuffer buffer = null, fullFooterBuffer = null;
-    OrcProto.PostScript ps = null;
-    OrcFile.WriterVersion writerVersion = null;
+    ByteBuffer buffer;
+    OrcProto.PostScript ps;
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
+    long modificationTime;
     try {
       // figure out the size of the file using the option or filesystem
       long size;
       if (maxFileLength == Long.MAX_VALUE) {
-        size = fs.getFileStatus(path).getLen();
+        FileStatus fileStatus = fs.getFileStatus(path);
+        size = fileStatus.getLen();
+        modificationTime = fileStatus.getModificationTime();
       } else {
         size = maxFileLength;
+        modificationTime = -1;
       }
+      fileTailBuilder.setFileLength(size);
 
       //read last bytes into buffer to get PostScript
       int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
@@ -461,13 +513,16 @@ public class ReaderImpl implements Reader {
       ensureOrcFooter(file, path, psLen, buffer);
       int psOffset = readSize - 1 - psLen;
       ps = extractPostScript(buffer, path, psLen, psOffset);
+      bufferSize = (int) ps.getCompressionBlockSize();
+      codec = WriterImpl.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+      fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);
 
       int footerSize = (int) ps.getFooterLength();
       int metadataSize = (int) ps.getMetadataLength();
-      writerVersion = extractWriterVersion(ps);
 
       //check if extra bytes need to be read
       int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
+      int tailSize = 1 + psLen + footerSize + metadataSize;
       if (extra > 0) {
         //more bytes need to be read, seek back to the right place and read extra bytes
         ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
@@ -478,17 +533,23 @@ public class ReaderImpl implements Reader {
         extraBuf.put(buffer);
         buffer = extraBuf;
         buffer.position(0);
-        fullFooterBuffer = buffer.slice();
-        buffer.limit(footerSize + metadataSize);
+        buffer.limit(tailSize);
+        readSize += extra;
+        psOffset = readSize - 1 - psLen;
       } else {
         //footer is already in the bytes in buffer, just adjust position, length
         buffer.position(psOffset - footerSize - metadataSize);
-        fullFooterBuffer = buffer.slice();
-        buffer.limit(psOffset);
+        buffer.limit(buffer.position() + tailSize);
       }
 
-      // remember position for later TODO: what later? this comment is useless
       buffer.mark();
+      int footerOffset = psOffset - footerSize;
+      buffer.position(footerOffset);
+      ByteBuffer footerBuffer = buffer.slice();
+      buffer.reset();
+      OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize,
+          codec, bufferSize);
+      fileTailBuilder.setFooter(footer);
     } finally {
       try {
         file.close();
@@ -497,68 +558,15 @@ public class ReaderImpl implements Reader {
       }
     }
 
-    return new FileMetaInfo(
-        ps.getCompression().toString(),
-        (int) ps.getCompressionBlockSize(),
-        (int) ps.getMetadataLength(),
-        buffer,
-        ps.getVersionList(),
-        writerVersion,
-        fullFooterBuffer
-        );
-  }
-
-  protected static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) {
-    return (ps.hasWriterVersion()
-        ? getWriterVersion(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
-  }
-
-  protected static List<StripeInformation> convertProtoStripesToStripes(
-      List<OrcProto.StripeInformation> stripes) {
-    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
-    for (OrcProto.StripeInformation info : stripes) {
-      result.add(new StripeInformationImpl(info));
-    }
-    return result;
-  }
-
-  /**
-   * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
-   *  from serialized fields.
-   * As the fields are final, the fields need to be initialized in the constructor and
-   *  can't be done in some helper function. So this helper class is used instead.
-   *
-   */
-  private static class MetaInfoObjExtractor{
-    final org.apache.orc.CompressionKind compressionKind;
-    final CompressionCodec codec;
-    final int bufferSize;
-    final int metadataSize;
-    final OrcProto.Metadata metadata;
-    final OrcProto.Footer footer;
-
-    MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, 
-        ByteBuffer footerBuffer) throws IOException {
-
-      this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr.toUpperCase());
-      this.bufferSize = bufferSize;
-      this.codec = WriterImpl.createCodec(compressionKind);
-      this.metadataSize = metadataSize;
-
-      int position = footerBuffer.position();
-      int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
-
-      this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
-      this.footer = extractFooter(
-          footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
-
-      footerBuffer.position(position);
-    }
+    ByteBuffer serializedTail = ByteBuffer.allocate(buffer.remaining());
+    serializedTail.put(buffer.slice());
+    serializedTail.rewind();
+    return new OrcTail(fileTailBuilder.build(), serializedTail, modificationTime);
   }
 
   @Override
   public ByteBuffer getSerializedFileFooter() {
-    return footerMetaAndPsBuffer;
+    return tail.getSerializedTail();
   }
 
   @Override
@@ -727,7 +735,11 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
-  public List<StripeStatistics> getStripeStatistics() {
+  public List<StripeStatistics> getStripeStatistics() throws IOException {
+    if (stripeStats == null && metadata == null) {
+      metadata = extractMetadata(tail.getSerializedTail(), 0, metadataSize, codec, bufferSize);
+      stripeStats = metadata.getStripeStatsList();
+    }
     List<StripeStatistics> result = new ArrayList<>();
     for (OrcProto.StripeStatistics ss : stripeStats) {
       result.add(new StripeStatistics(ss.getColStatsList()));

http://git-wip-us.apache.org/repos/asf/orc/blob/13ee0b3c/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 6b7e597..dbc34ab 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -224,6 +224,7 @@ message PostScript {
 }
 
 // The contents of the file tail that must be serialized.
+// This gets serialized as part of OrcSplit, also used by footer cache.
 message FileTail {
   optional PostScript postscript = 1;
   optional Footer footer = 2;

[3/7] orc git commit: Updating the versions of storage api for merging changes from Hive.

Posted by om...@apache.org.

Updating the versions of storage api for merging changes from Hive.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/cbedf88d
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/cbedf88d
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/cbedf88d

Branch: refs/heads/master
Commit: cbedf88d19951f6d93d0edcb333ec79231cd8dc9
Parents: d9d529b
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 11:02:06 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700

----------------------------------------------------------------------
 java/CMakeLists.txt      | 2 +-
 java/pom.xml             | 2 +-
 java/storage-api/pom.xml | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/cbedf88d/java/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt
index b1be1dc..e8ca7ff 100644
--- a/java/CMakeLists.txt
+++ b/java/CMakeLists.txt
@@ -15,7 +15,7 @@ execute_process(COMMAND mvn versions:set -DnewVersion=${ORC_VERSION}
                 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
 
 set(ORC_JARS
-  ${CMAKE_CURRENT_BINARY_DIR}/storage-api/hive-storage-api-2.1.0-pre-orc.jar
+  ${CMAKE_CURRENT_BINARY_DIR}/storage-api/hive-storage-api-2.1.1-pre-orc.jar
   ${CMAKE_CURRENT_BINARY_DIR}/core/orc-core-${ORC_VERSION}.jar
   ${CMAKE_CURRENT_BINARY_DIR}/mapreduce/orc-mapreduce-${ORC_VERSION}.jar
 )

http://git-wip-us.apache.org/repos/asf/orc/blob/cbedf88d/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index f707cb6..01fdf04 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -69,7 +69,7 @@
     <test.tmp.dir>${project.build.directory}/testing-tmp</test.tmp.dir>
 
     <hadoop.version>2.6.0</hadoop.version>
-    <storage-api.version>2.1.0-pre-orc</storage-api.version>
+    <storage-api.version>2.1.1-pre-orc</storage-api.version>
   </properties>
 
   <build>

http://git-wip-us.apache.org/repos/asf/orc/blob/cbedf88d/java/storage-api/pom.xml
----------------------------------------------------------------------
diff --git a/java/storage-api/pom.xml b/java/storage-api/pom.xml
index 86463e6..fae7e9a 100644
--- a/java/storage-api/pom.xml
+++ b/java/storage-api/pom.xml
@@ -25,8 +25,8 @@
   <groupId>org.apache.hive</groupId>
   <artifactId>hive-storage-api</artifactId>
   <!-- remove our custom version of storage-api once we get the changes
-       released as hive 2.1.0 -->
-  <version>2.1.0-pre-orc</version>
+       released as hive 2.1.1 -->
+  <version>2.1.1-pre-orc</version>
   <packaging>jar</packaging>
   <name>Hive Storage API</name>

[2/7] orc git commit: HIVE-13948 Incorrect timezone handling in Writable.

Posted by om...@apache.org.

HIVE-13948 Incorrect timezone handling in Writable.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/68cdbd40
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/68cdbd40
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/68cdbd40

Branch: refs/heads/master
Commit: 68cdbd402e8bc81e176bb8175c1fb1173a4b3c23
Parents: cbedf88
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:19:30 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/serde2/io/DateWritable.java     | 68 ++++++++++++++++----
 1 file changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/68cdbd40/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
index dd2b1d9..637720a 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.sql.Date;
 import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
 
@@ -41,7 +42,7 @@ public class DateWritable implements WritableComparable<DateWritable> {
 
   private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);
 
-  // Local time zone.
+  // Local time zone. Store separately because Calendar would clone it.
   // Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
   private static final ThreadLocal<TimeZone> LOCAL_TIMEZONE = new ThreadLocal<TimeZone>() {
     @Override
@@ -50,6 +51,19 @@ public class DateWritable implements WritableComparable<DateWritable> {
     }
   };
 
+  private static final ThreadLocal<Calendar> UTC_CALENDAR = new ThreadLocal<Calendar>() {
+    @Override
+    protected Calendar initialValue() {
+      return new GregorianCalendar(TimeZone.getTimeZone("UTC"));
+    }
+  };
+  private static final ThreadLocal<Calendar> LOCAL_CALENDAR = new ThreadLocal<Calendar>() {
+    @Override
+    protected Calendar initialValue() {
+      return Calendar.getInstance();
+    }
+  };
+
   // Internal representation is an integer representing day offset from our epoch value 1970-01-01
   private int daysSinceEpoch = 0;
 
@@ -95,11 +109,16 @@ public class DateWritable implements WritableComparable<DateWritable> {
   }
 
   /**
-   *
    * @return Date value corresponding to the date in the local time zone
    */
   public Date get() {
-    return new Date(daysToMillis(daysSinceEpoch));
+    return get(true);
+  }
+
+  // TODO: we should call this more often. In theory, for DATE type, time should never matter, but
+  //       it's hard to tell w/some code paths like UDFs/OIs etc. that are used in many places.
+  public Date get(boolean doesTimeMatter) {
+    return new Date(daysToMillis(daysSinceEpoch, doesTimeMatter));
   }
 
   public int getDays() {
@@ -119,21 +138,47 @@ public class DateWritable implements WritableComparable<DateWritable> {
   }
 
   public static long daysToMillis(int d) {
-    // Convert from day offset to ms in UTC, then apply local timezone offset.
-    long millisUtc = d * MILLIS_PER_DAY;
-    long tmp =  millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc);
-    // Between millisUtc and tmp, the time zone offset may have changed due to DST.
-    // Look up the offset again.
-    return millisUtc - LOCAL_TIMEZONE.get().getOffset(tmp);
+    return daysToMillis(d, true);
+  }
+
+  public static long daysToMillis(int d, boolean doesTimeMatter) {
+    // What we are trying to get is the equivalent of new Date(ymd).getTime() in the local tz,
+    // where ymd is whatever d represents. How it "works" is this.
+    // First we get the UTC midnight for that day (which always exists, a small island of sanity).
+    long utcMidnight = d * MILLIS_PER_DAY;
+    // Now we take a local TZ offset at midnight UTC. Say we are in -4; that means (surprise
+    // surprise) that at midnight UTC it was 20:00 in local. So far we are on firm ground.
+    long utcMidnightOffset = LOCAL_TIMEZONE.get().getOffset(utcMidnight);
+    // And now we wander straight into the swamp, when instead of adding, we subtract it from UTC
+    // midnight to supposedly get local midnight (in the above case, 4:00 UTC). Of course, given
+    // all the insane DST variations, where we actually end up is anyone's guess.
+    long hopefullyMidnight = utcMidnight - utcMidnightOffset;
+    // Then we determine the local TZ offset at that magical time.
+    long offsetAtHM = LOCAL_TIMEZONE.get().getOffset(hopefullyMidnight);
+    // If the offsets are the same, we assume our initial jump did not cross any DST boundaries,
+    // and is thus valid. Both times flowed at the same pace. We congratulate ourselves and bail.
+    if (utcMidnightOffset == offsetAtHM) return hopefullyMidnight;
+    // Alas, we crossed some DST boundary. If the time of day doesn't matter to the caller, we'll
+    // simply get the next day and go back half a day. This is not ideal but seems to work.
+    if (!doesTimeMatter) return daysToMillis(d + 1) - (MILLIS_PER_DAY >> 1);
+    // Now, we could get previous and next day, figure our how many hours were inserted or removed,
+    // and from which of the days, etc. But at this point our gun is pointing straight at our foot,
+    // so let's just go the safe, expensive way.
+    Calendar utc = UTC_CALENDAR.get(), local = LOCAL_CALENDAR.get();
+    utc.setTimeInMillis(utcMidnight);
+    local.set(utc.get(Calendar.YEAR), utc.get(Calendar.MONTH), utc.get(Calendar.DAY_OF_MONTH));
+    return local.getTimeInMillis();
   }
 
   public static int millisToDays(long millisLocal) {
+    // We assume millisLocal is midnight of some date. What we are basically trying to do
+    // here is go from local-midnight to UTC-midnight (or whatever time that happens to be).
     long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal);
     int days;
     if (millisUtc >= 0L) {
       days = (int) (millisUtc / MILLIS_PER_DAY);
     } else {
-      days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY);
+      days = (int) ((millisUtc - 86399999 /*(MILLIS_PER_DAY - 1)*/) / MILLIS_PER_DAY);
     }
     return days;
   }
@@ -169,7 +214,8 @@ public class DateWritable implements WritableComparable<DateWritable> {
 
   @Override
   public String toString() {
-    return get().toString();
+    // For toString, the time does not matter
+    return get(false).toString();
   }
 
   @Override

[4/7] orc git commit: HIVE-14000 Changing a numeric type column causes values other than NULL.

Posted by om...@apache.org.

HIVE-14000 Changing a numeric type column causes values other than NULL.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/17e14f62
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/17e14f62
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/17e14f62

Branch: refs/heads/master
Commit: 17e14f62d2315c5149e4e491ccf922913b13cc7e
Parents: 68cdbd4
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:21:24 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:57 2016 -0700

----------------------------------------------------------------------
 .../orc/impl/ConvertTreeReaderFactory.java      | 305 ++++++++++---------
 .../hadoop/hive/ql/util/TimestampUtils.java     |  70 +++--
 2 files changed, 206 insertions(+), 169 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/17e14f62/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 3ba56f7..753e5bc 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -35,8 +35,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.util.TimestampUtils;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.FloatWritable;
 import org.apache.orc.OrcProto;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.TypeDescription.Category;
@@ -263,6 +261,22 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       return string;
     }
 
+    private static final double MIN_LONG_AS_DOUBLE = -0x1p63;
+    /*
+     * We cannot store Long.MAX_VALUE as a double without losing precision. Instead, we store
+     * Long.MAX_VALUE + 1 == -Long.MIN_VALUE, and then offset all comparisons by 1.
+     */
+    private static final double MAX_LONG_AS_DOUBLE_PLUS_ONE = 0x1p63;
+
+    public boolean doubleCanFitInLong(double doubleValue) {
+
+      // Borrowed from Guava DoubleMath.roundToLong except do not want dependency on Guava and we
+      // don't want to catch an exception.
+
+      return ((MIN_LONG_AS_DOUBLE - doubleValue < 1.0) &&
+              (doubleValue < MAX_LONG_AS_DOUBLE_PLUS_ONE));
+    }
+
     @Override
     void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
       // Pass-thru.
@@ -336,20 +350,44 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       }
     }
 
-    public long downCastAnyInteger(long input, TypeDescription readerType) {
-      switch (readerType.getCategory()) {
+    public void downCastAnyInteger(LongColumnVector longColVector, int elementNum,
+        TypeDescription readerType) {
+      downCastAnyInteger(longColVector, elementNum, longColVector.vector[elementNum], readerType);
+    }
+
+    public void downCastAnyInteger(LongColumnVector longColVector, int elementNum, long inputLong,
+        TypeDescription readerType) {
+      long[] vector = longColVector.vector;
+      long outputLong;
+      Category readerCategory = readerType.getCategory();
+      switch (readerCategory) {
       case BOOLEAN:
-        return input == 0 ? 0 : 1;
+        // No data loss for boolean.
+        vector[elementNum] = inputLong == 0 ? 0 : 1;
+        return;
       case BYTE:
-        return (byte) input;
+        outputLong = (byte) inputLong;
+        break;
       case SHORT:
-        return (short) input;
+        outputLong = (short) inputLong;
+        break;
       case INT:
-        return (int) input;
+        outputLong = (int) inputLong;
+        break;
       case LONG:
-        return input;
+        // No data loss for long.
+        vector[elementNum] = inputLong;
+        return;
       default:
-        throw new RuntimeException("Unexpected type kind " + readerType.getCategory().name());
+        throw new RuntimeException("Unexpected type kind " + readerCategory.name());
+      }
+
+      if (outputLong != inputLong) {
+        // Data loss.
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        vector[elementNum] = outputLong;
       }
     }
 
@@ -439,25 +477,22 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       anyIntegerAsLongTreeReader.nextVector(previousVector, isNull, batchSize);
       LongColumnVector resultColVector = (LongColumnVector) previousVector;
       if (downCastNeeded) {
-        long[] resultVector = resultColVector.vector;
         if (resultColVector.isRepeating) {
           if (resultColVector.noNulls || !resultColVector.isNull[0]) {
-            resultVector[0] = downCastAnyInteger(resultVector[0], readerType);
+            downCastAnyInteger(resultColVector, 0, readerType);
           } else {
-            resultColVector.noNulls = false;
-            resultColVector.isNull[0] = true;
+            // Result remains null.
           }
         } else if (resultColVector.noNulls){
           for (int i = 0; i < batchSize; i++) {
-            resultVector[i] = downCastAnyInteger(resultVector[i], readerType);
+            downCastAnyInteger(resultColVector, i, readerType);
           }
         } else {
           for (int i = 0; i < batchSize; i++) {
             if (!resultColVector.isNull[i]) {
-              resultVector[i] = downCastAnyInteger(resultVector[i], readerType);
+              downCastAnyInteger(resultColVector, i, readerType);
             } else {
-              resultColVector.noNulls = false;
-              resultColVector.isNull[i] = true;
+              // Result remains null.
             }
           }
         }
@@ -470,7 +505,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     private FloatTreeReader floatTreeReader;
 
     private final TypeDescription readerType;
-    private FloatWritable floatResult;
     private DoubleColumnVector doubleColVector;
     private LongColumnVector longColVector;
 
@@ -480,15 +514,19 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       this.readerType = readerType;
       floatTreeReader = new FloatTreeReader(columnId);
       setConvertTreeReader(floatTreeReader);
-      floatResult = new FloatWritable();
     }
 
     @Override
     public void setConvertVectorElement(int elementNum) throws IOException {
-      float floatValue = (float) doubleColVector.vector[elementNum];
-      longColVector.vector[elementNum] =
-          downCastAnyInteger(
-              (long) floatValue, readerType);
+      double doubleValue = doubleColVector.vector[elementNum];
+      if (!doubleCanFitInLong(doubleValue)) {
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        // UNDONE: Does the overflow check above using double really work here for float?
+        float floatValue = (float) doubleValue;
+        downCastAnyInteger(longColVector, elementNum, (long) floatValue, readerType);
+      }
     }
 
     @Override
@@ -525,9 +563,13 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     @Override
     public void setConvertVectorElement(int elementNum) throws IOException {
-      longColVector.vector[elementNum] =
-          downCastAnyInteger(
-              (long) doubleColVector.vector[elementNum], readerType);
+      double doubleValue = doubleColVector.vector[elementNum];
+      if (!doubleCanFitInLong(doubleValue)) {
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        downCastAnyInteger(longColVector, elementNum, (long) doubleValue, readerType);
+      }
     }
 
     @Override
@@ -553,7 +595,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     private final int precision;
     private final int scale;
     private final TypeDescription readerType;
-    private HiveDecimalWritable hiveDecimalResult;
     private DecimalColumnVector decimalColVector;
     private LongColumnVector longColVector;
 
@@ -565,15 +606,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       this.readerType = readerType;
       decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
       setConvertTreeReader(decimalTreeReader);
-      hiveDecimalResult = new HiveDecimalWritable();
     }
 
+    private static HiveDecimal DECIMAL_MAX_LONG = HiveDecimal.create(Long.MAX_VALUE);
+    private static HiveDecimal DECIMAL_MIN_LONG = HiveDecimal.create(Long.MIN_VALUE);
+
     @Override
     public void setConvertVectorElement(int elementNum) throws IOException {
-      longColVector.vector[elementNum] =
-          downCastAnyInteger(
-              decimalColVector.vector[elementNum].getHiveDecimal().longValue(),
-              readerType);
+      HiveDecimal decimalValue = decimalColVector.vector[elementNum].getHiveDecimal();
+      if (decimalValue.compareTo(DECIMAL_MAX_LONG) > 0 ||
+          decimalValue.compareTo(DECIMAL_MIN_LONG) < 0) {
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        downCastAnyInteger(longColVector, elementNum, decimalValue.longValue(), readerType);
+      }
     }
 
     @Override
@@ -596,7 +643,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private final TypeDescription readerType;
     private BytesColumnVector bytesColVector;
     private LongColumnVector longColVector;
@@ -604,7 +650,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     AnyIntegerFromStringGroupTreeReader(int columnId, TypeDescription fileType,
         TypeDescription readerType) throws IOException {
       super(columnId);
-      this.fileType = fileType;
       this.readerType = readerType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
@@ -615,8 +660,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
       long longValue = parseLongFromString(string);
       if (!getIsParseError()) {
-        longColVector.vector[elementNum] =
-            downCastAnyInteger(longValue, readerType);
+        downCastAnyInteger(longColVector, elementNum, longValue, readerType);
       } else {
         longColVector.noNulls = false;
         longColVector.isNull[elementNum] = true;
@@ -660,8 +704,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       // Use TimestampWritable's getSeconds.
       long longValue = TimestampUtils.millisToSeconds(
           timestampColVector.asScratchTimestamp(elementNum).getTime());
-      longColVector.vector[elementNum] =
-          downCastAnyInteger(longValue, readerType);
+      downCastAnyInteger(longColVector, elementNum, longValue, readerType);
     }
 
     @Override
@@ -745,8 +788,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
         if (resultColVector.noNulls || !resultColVector.isNull[0]) {
           resultVector[0] = (float) resultVector[0];
         } else {
-          resultColVector.noNulls = false;
-          resultColVector.isNull[0] = true;
+          // Remains null.
         }
       } else if (resultColVector.noNulls){
         for (int i = 0; i < batchSize; i++) {
@@ -757,8 +799,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
           if (!resultColVector.isNull[i]) {
             resultVector[i] = (float) resultVector[i];
           } else {
-            resultColVector.noNulls = false;
-            resultColVector.isNull[i] = true;
+            // Remains null.
           }
         }
       }
@@ -771,8 +812,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private final int precision;
     private final int scale;
-    private final TypeDescription readerType;
-    private HiveDecimalWritable hiveDecimalResult;
     private DecimalColumnVector decimalColVector;
     private DoubleColumnVector doubleColVector;
 
@@ -781,10 +820,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       super(columnId);
       this.precision = fileType.getPrecision();
       this.scale = fileType.getScale();
-      this.readerType = readerType;
       decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
       setConvertTreeReader(decimalTreeReader);
-      hiveDecimalResult = new HiveDecimalWritable();
     }
 
     @Override
@@ -813,14 +850,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private BytesColumnVector bytesColVector;
     private DoubleColumnVector doubleColVector;
 
     FloatFromStringGroupTreeReader(int columnId, TypeDescription fileType)
         throws IOException {
       super(columnId);
-      this.fileType = fileType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
     }
@@ -858,14 +893,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TimestampTreeReader timestampTreeReader;
 
-    private final TypeDescription readerType;
     private TimestampColumnVector timestampColVector;
     private DoubleColumnVector doubleColVector;
 
-    FloatFromTimestampTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
+    FloatFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
       super(columnId);
-      this.readerType = readerType;
       timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
       setConvertTreeReader(timestampTreeReader);
     }
@@ -940,13 +972,10 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private FloatTreeReader floatTreeReader;
 
-    private FloatWritable floatResult;
-
     DoubleFromFloatTreeReader(int columnId) throws IOException {
       super(columnId);
       floatTreeReader = new FloatTreeReader(columnId);
       setConvertTreeReader(floatTreeReader);
-      floatResult = new FloatWritable();
     }
 
     @Override
@@ -964,20 +993,15 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private final int precision;
     private final int scale;
-    private final TypeDescription readerType;
-    private HiveDecimalWritable hiveDecimalResult;
     private DecimalColumnVector decimalColVector;
     private DoubleColumnVector doubleColVector;
 
-    DoubleFromDecimalTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType) throws IOException {
+    DoubleFromDecimalTreeReader(int columnId, TypeDescription fileType) throws IOException {
       super(columnId);
       this.precision = fileType.getPrecision();
       this.scale = fileType.getScale();
-      this.readerType = readerType;
       decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
       setConvertTreeReader(decimalTreeReader);
-      hiveDecimalResult = new HiveDecimalWritable();
     }
 
     @Override
@@ -1006,14 +1030,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private BytesColumnVector bytesColVector;
     private DoubleColumnVector doubleColVector;
 
     DoubleFromStringGroupTreeReader(int columnId, TypeDescription fileType)
         throws IOException {
       super(columnId);
-      this.fileType = fileType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
     }
@@ -1050,14 +1072,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TimestampTreeReader timestampTreeReader;
 
-    private final TypeDescription readerType;
     private TimestampColumnVector timestampColVector;
     private DoubleColumnVector doubleColVector;
 
-    DoubleFromTimestampTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
+    DoubleFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
       super(columnId);
-      this.readerType = readerType;
       timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
       setConvertTreeReader(timestampTreeReader);
     }
@@ -1088,16 +1107,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
 
-    private int precision;
-    private int scale;
     private LongColumnVector longColVector;
     private DecimalColumnVector decimalColVector;
 
-    DecimalFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType, boolean skipCorrupt) throws IOException {
+    DecimalFromAnyIntegerTreeReader(int columnId, TypeDescription fileType, boolean skipCorrupt)
+        throws IOException {
       super(columnId);
-      this.precision = readerType.getPrecision();
-      this.scale = readerType.getScale();
       anyIntegerAsLongTreeReader =
           new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
       setConvertTreeReader(anyIntegerAsLongTreeReader);
@@ -1106,8 +1121,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     @Override
     public void setConvertVectorElement(int elementNum) {
       long longValue = longColVector.vector[elementNum];
-      HiveDecimalWritable hiveDecimalWritable =
-          new HiveDecimalWritable(longValue);
+      HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(longValue);
+      // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
       decimalColVector.set(elementNum, hiveDecimalWritable);
     }
 
@@ -1131,30 +1146,25 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private FloatTreeReader floatTreeReader;
 
-    private int precision;
-    private int scale;
-    private FloatWritable floatResult;
     private DoubleColumnVector doubleColVector;
     private DecimalColumnVector decimalColVector;
 
     DecimalFromFloatTreeReader(int columnId, TypeDescription readerType)
         throws IOException {
       super(columnId);
-      this.precision = readerType.getPrecision();
-      this.scale = readerType.getScale();
       floatTreeReader = new FloatTreeReader(columnId);
       setConvertTreeReader(floatTreeReader);
-      floatResult = new FloatWritable();
     }
 
     @Override
     public void setConvertVectorElement(int elementNum) throws IOException {
       float floatValue = (float) doubleColVector.vector[elementNum];
       if (!Float.isNaN(floatValue)) {
-        HiveDecimal value =
+        HiveDecimal decimalValue =
             HiveDecimal.create(Float.toString(floatValue));
-        if (value != null) {
-          decimalColVector.set(elementNum, value);
+        if (decimalValue != null) {
+          // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
+          decimalColVector.set(elementNum, decimalValue);
         } else {
           decimalColVector.noNulls = false;
           decimalColVector.isNull[elementNum] = true;
@@ -1227,14 +1237,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private BytesColumnVector bytesColVector;
     private DecimalColumnVector decimalColVector;
 
     DecimalFromStringGroupTreeReader(int columnId, TypeDescription fileType,
         TypeDescription readerType) throws IOException {
       super(columnId);
-      this.fileType = fileType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
     }
@@ -1244,6 +1252,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
       HiveDecimal value = parseDecimalFromString(string);
       if (value != null) {
+        // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
         decimalColVector.set(elementNum, value);
       } else {
         decimalColVector.noNulls = false;
@@ -1271,18 +1280,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TimestampTreeReader timestampTreeReader;
 
-    private final TypeDescription readerType;
     private TimestampColumnVector timestampColVector;
-    private int precision;
-    private int scale;
     private DecimalColumnVector decimalColVector;
 
-    DecimalFromTimestampTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
+    DecimalFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
       super(columnId);
-      this.readerType = readerType;
-      this.precision = readerType.getPrecision();
-      this.scale = readerType.getScale();
       timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
       setConvertTreeReader(timestampTreeReader);
     }
@@ -1293,6 +1295,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
           timestampColVector.asScratchTimestamp(elementNum));
       HiveDecimal value = HiveDecimal.create(Double.toString(doubleValue));
       if (value != null) {
+        // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
         decimalColVector.set(elementNum, value);
       } else {
         decimalColVector.noNulls = false;
@@ -1316,11 +1319,61 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     }
   }
 
+  public static class DecimalFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private DecimalColumnVector fileDecimalColVector;
+    private int filePrecision;
+    private int fileScale;
+    private int readerPrecision;
+    private int readerScale;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromDecimalTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType)
+        throws IOException {
+      super(columnId);
+      filePrecision = fileType.getPrecision();
+      fileScale = fileType.getScale();
+      readerPrecision = readerType.getPrecision();
+      readerScale = readerType.getScale();
+      decimalTreeReader = new DecimalTreeReader(columnId, filePrecision, fileScale);
+      setConvertTreeReader(decimalTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+
+      HiveDecimalWritable valueWritable = HiveDecimalWritable.enforcePrecisionScale(
+          fileDecimalColVector.vector[elementNum], readerPrecision, readerScale);
+      if (valueWritable != null) {
+        decimalColVector.set(elementNum, valueWritable);
+      } else {
+        decimalColVector.noNulls = false;
+        decimalColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (fileDecimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        fileDecimalColVector = new DecimalColumnVector(filePrecision, fileScale);
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(fileDecimalColVector, isNull, batchSize);
+
+      convertVector(fileDecimalColVector, decimalColVector, batchSize);
+    }
+  }
+
   public static class StringGroupFromAnyIntegerTreeReader extends ConvertTreeReader {
 
     private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
 
-    private final TypeDescription fileType;
     private final TypeDescription readerType;
     private LongColumnVector longColVector;
     private BytesColumnVector bytesColVector;
@@ -1328,7 +1381,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     StringGroupFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
         TypeDescription readerType, boolean skipCorrupt) throws IOException {
       super(columnId);
-      this.fileType = fileType;
       this.readerType = readerType;
       anyIntegerAsLongTreeReader =
           new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
@@ -1364,7 +1416,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     private FloatTreeReader floatTreeReader;
 
     private final TypeDescription readerType;
-    private FloatWritable floatResult;
     private DoubleColumnVector doubleColVector;
     private BytesColumnVector bytesColVector;
 
@@ -1375,7 +1426,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       this.readerType = readerType;
       floatTreeReader = new FloatTreeReader(columnId);
       setConvertTreeReader(floatTreeReader);
-      floatResult = new FloatWritable();
     }
 
     @Override
@@ -1544,7 +1594,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     private final TypeDescription readerType;
     private LongColumnVector longColVector;
     private BytesColumnVector bytesColVector;
-    private DateWritable dateWritableResult;
     private Date date;
 
     StringGroupFromDateTreeReader(int columnId, TypeDescription readerType,
@@ -1553,7 +1602,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       this.readerType = readerType;
       dateTreeReader = new DateTreeReader(columnId);
       setConvertTreeReader(dateTreeReader);
-      dateWritableResult = new DateWritable();
       date = new Date(0);
     }
 
@@ -1585,13 +1633,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private final TypeDescription readerType;
 
     StringGroupFromStringGroupTreeReader(int columnId, TypeDescription fileType,
         TypeDescription readerType) throws IOException {
       super(columnId);
-      this.fileType = fileType;
       this.readerType = readerType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
@@ -1609,8 +1655,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
         if (resultColVector.noNulls || !resultColVector.isNull[0]) {
           convertStringGroupVectorElement(resultColVector, 0, readerType);
         } else {
-          resultColVector.noNulls = false;
-          resultColVector.isNull[0] = true;
+          // Remains null.
         }
       } else if (resultColVector.noNulls){
         for (int i = 0; i < batchSize; i++) {
@@ -1621,8 +1666,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
           if (!resultColVector.isNull[i]) {
             convertStringGroupVectorElement(resultColVector, i, readerType);
           } else {
-            resultColVector.noNulls = false;
-            resultColVector.isNull[i] = true;
+            // Remains null.
           }
         }
       }
@@ -1634,7 +1678,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     private BinaryTreeReader binaryTreeReader;
 
     private final TypeDescription readerType;
-    private BytesWritable binaryWritableResult;
     private BytesColumnVector inBytesColVector;
     private BytesColumnVector outBytesColVector;
 
@@ -1644,7 +1687,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       this.readerType = readerType;
       binaryTreeReader = new BinaryTreeReader(columnId);
       setConvertTreeReader(binaryTreeReader);
-      binaryWritableResult = new BytesWritable();
     }
 
     @Override
@@ -1725,7 +1767,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private FloatTreeReader floatTreeReader;
 
-    private FloatWritable floatResult;
     private DoubleColumnVector doubleColVector;
     private TimestampColumnVector timestampColVector;
 
@@ -1734,14 +1775,14 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       super(columnId);
       floatTreeReader = new FloatTreeReader(columnId);
       setConvertTreeReader(floatTreeReader);
-      floatResult = new FloatWritable();
     }
 
     @Override
     public void setConvertVectorElement(int elementNum) {
       float floatValue = (float) doubleColVector.vector[elementNum];
-      timestampColVector.set(elementNum,
-          TimestampUtils.doubleToTimestamp(floatValue));
+      Timestamp timestampValue = TimestampUtils.doubleToTimestamp(floatValue);
+      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+      timestampColVector.set(elementNum, timestampValue);
     }
 
     @Override
@@ -1777,8 +1818,9 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     @Override
     public void setConvertVectorElement(int elementNum) {
       double doubleValue = doubleColVector.vector[elementNum];
-      timestampColVector.set(elementNum,
-          TimestampUtils.doubleToTimestamp(doubleValue));
+      Timestamp timestampValue = TimestampUtils.doubleToTimestamp(doubleValue);
+      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+      timestampColVector.set(elementNum, timestampValue);
     }
 
     @Override
@@ -1803,7 +1845,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private final int precision;
     private final int scale;
-    private HiveDecimalWritable hiveDecimalResult;
     private DecimalColumnVector decimalColVector;
     private TimestampColumnVector timestampColVector;
 
@@ -1814,14 +1855,14 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       this.scale = fileType.getScale();
       decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
       setConvertTreeReader(decimalTreeReader);
-      hiveDecimalResult = new HiveDecimalWritable();
     }
 
     @Override
     public void setConvertVectorElement(int elementNum) {
       Timestamp timestampValue =
-          TimestampUtils.decimalToTimestamp(
-              decimalColVector.vector[elementNum].getHiveDecimal());
+            TimestampUtils.decimalToTimestamp(
+                decimalColVector.vector[elementNum].getHiveDecimal());
+      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
       timestampColVector.set(elementNum, timestampValue);
     }
 
@@ -1845,14 +1886,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private BytesColumnVector bytesColVector;
     private TimestampColumnVector timestampColVector;
 
     TimestampFromStringGroupTreeReader(int columnId, TypeDescription fileType)
         throws IOException {
       super(columnId);
-      this.fileType = fileType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
     }
@@ -1890,7 +1929,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private DateTreeReader dateTreeReader;
 
-    private DateWritable doubleResult;
     private LongColumnVector longColVector;
     private TimestampColumnVector timestampColVector;
 
@@ -1899,7 +1937,6 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       super(columnId);
       dateTreeReader = new DateTreeReader(columnId);
       setConvertTreeReader(dateTreeReader);
-      doubleResult = new DateWritable();
     }
 
     @Override
@@ -1929,14 +1966,12 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
     private BytesColumnVector bytesColVector;
     private LongColumnVector longColVector;
 
     DateFromStringGroupTreeReader(int columnId, TypeDescription fileType)
         throws IOException {
       super(columnId);
-      this.fileType = fileType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
     }
@@ -1974,14 +2009,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TimestampTreeReader timestampTreeReader;
 
-    private final TypeDescription readerType;
     private TimestampColumnVector timestampColVector;
     private LongColumnVector longColVector;
 
-    DateFromTimestampTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
+    DateFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
       super(columnId);
-      this.readerType = readerType;
       timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
       setConvertTreeReader(timestampTreeReader);
     }
@@ -2014,12 +2046,9 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private TreeReader stringGroupTreeReader;
 
-    private final TypeDescription fileType;
-
     BinaryFromStringGroupTreeReader(int columnId, TypeDescription fileType)
         throws IOException {
       super(columnId);
-      this.fileType = fileType;
       stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
       setConvertTreeReader(stringGroupTreeReader);
     }
@@ -2064,7 +2093,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
           skipCorrupt);
 
     case DECIMAL:
-      return new DecimalFromAnyIntegerTreeReader(columnId, fileType, readerType, skipCorrupt);
+      return new DecimalFromAnyIntegerTreeReader(columnId, fileType, skipCorrupt);
 
     case STRING:
     case CHAR:
@@ -2208,7 +2237,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       return new FloatFromDecimalTreeReader(columnId, fileType, readerType);
 
     case DOUBLE:
-      return new DoubleFromDecimalTreeReader(columnId, fileType, readerType);
+      return new DoubleFromDecimalTreeReader(columnId, fileType);
 
     case STRING:
     case CHAR:
@@ -2424,13 +2453,13 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       return new AnyIntegerFromTimestampTreeReader(columnId, readerType, skipCorrupt);
 
     case FLOAT:
-      return new FloatFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+      return new FloatFromTimestampTreeReader(columnId, skipCorrupt);
 
     case DOUBLE:
-      return new DoubleFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+      return new DoubleFromTimestampTreeReader(columnId, skipCorrupt);
 
     case DECIMAL:
-      return new DecimalFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+      return new DecimalFromTimestampTreeReader(columnId, skipCorrupt);
 
     case STRING:
     case CHAR:
@@ -2442,7 +2471,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
           readerType.getCategory() + " to self needed");
 
     case DATE:
-      return new DateFromTimestampTreeReader(columnId, readerType, skipCorrupt);
+      return new DateFromTimestampTreeReader(columnId, skipCorrupt);
 
     // Not currently supported conversion(s):
     case BINARY:

http://git-wip-us.apache.org/repos/asf/orc/blob/17e14f62/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
index 189ead5..41db9ca 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
@@ -39,45 +39,53 @@ public class TimestampUtils {
   }
 
   public static Timestamp doubleToTimestamp(double f) {
-    long seconds = (long) f;
-
-    // We must ensure the exactness of the double's fractional portion.
-    // 0.6 as the fraction part will be converted to 0.59999... and
-    // significantly reduce the savings from binary serialization
-    BigDecimal bd;
     try {
-      bd = new BigDecimal(String.valueOf(f));
+      long seconds = (long) f;
+
+      // We must ensure the exactness of the double's fractional portion.
+      // 0.6 as the fraction part will be converted to 0.59999... and
+      // significantly reduce the savings from binary serialization
+      BigDecimal bd = new BigDecimal(String.valueOf(f));
+
+      bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
+      int nanos = bd.intValue();
+
+      // Convert to millis
+      long millis = seconds * 1000;
+      if (nanos < 0) {
+        millis -= 1000;
+        nanos += 1000000000;
+      }
+      Timestamp t = new Timestamp(millis);
+
+      // Set remaining fractional portion to nanos
+      t.setNanos(nanos);
+      return t;
     } catch (NumberFormatException nfe) {
       return null;
+    } catch (IllegalArgumentException iae) {
+      return null;
     }
-    bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
-    int nanos = bd.intValue();
-
-    // Convert to millis
-    long millis = seconds * 1000;
-    if (nanos < 0) {
-      millis -= 1000;
-      nanos += 1000000000;
-    }
-    Timestamp t = new Timestamp(millis);
-
-    // Set remaining fractional portion to nanos
-    t.setNanos(nanos);
-    return t;
   }
 
   public static Timestamp decimalToTimestamp(HiveDecimal d) {
-    BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL);
-    int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue();
-    if (nanos < 0) {
-      nanos += 1000000000;
-    }
-    long seconds =
-        nanoInstant.subtract(new BigDecimal(nanos)).divide(BILLION_BIG_DECIMAL).longValue();
-    Timestamp t = new Timestamp(seconds * 1000);
-    t.setNanos(nanos);
+    try {
+      BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL);
+      int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue();
+      if (nanos < 0) {
+        nanos += 1000000000;
+      }
+      long seconds =
+          nanoInstant.subtract(new BigDecimal(nanos)).divide(BILLION_BIG_DECIMAL).longValue();
+      Timestamp t = new Timestamp(seconds * 1000);
+      t.setNanos(nanos);
 
-    return t;
+      return t;
+    } catch (NumberFormatException nfe) {
+      return null;
+    } catch (IllegalArgumentException iae) {
+      return null;
+    }
   }
 
   /**

[5/7] orc git commit: HIVE-13648. ORC schema evolution doesn't support the same type conversions for varchar, char, or decimal when max length, precision, or scale are different.

Posted by om...@apache.org.

HIVE-13648. ORC schema evolution doesn't support the same type conversions for
varchar, char, or decimal when max length, precision, or scale are different.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/047265cd
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/047265cd
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/047265cd

Branch: refs/heads/master
Commit: 047265cd167cbb1d17b596a77ba5b3b8f868cdb2
Parents: e8c0eb5
Author: Owen O'Malley <om...@apache.org>
Authored: Thu Jun 30 10:26:24 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Jun 30 14:33:58 2016 -0700

----------------------------------------------------------------------
 .../orc/impl/ConvertTreeReaderFactory.java      | 28 +++++++++++---------
 .../org/apache/orc/impl/SchemaEvolution.java    |  6 ++---
 .../org/apache/orc/impl/TreeReaderFactory.java  |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/047265cd/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 753e5bc..eda47d3 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -257,7 +257,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
           bytesColVector.vector[elementNum],
           bytesColVector.start[elementNum], bytesColVector.length[elementNum],
           StandardCharsets.UTF_8);
- 
+
       return string;
     }
 
@@ -1323,6 +1323,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
 
     private DecimalTreeReader decimalTreeReader;
 
+    private final TypeDescription fileType;
+    private final TypeDescription readerType;
     private DecimalColumnVector fileDecimalColVector;
     private int filePrecision;
     private int fileScale;
@@ -1333,8 +1335,10 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     DecimalFromDecimalTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType)
         throws IOException {
       super(columnId);
+      this.fileType = fileType;
       filePrecision = fileType.getPrecision();
       fileScale = fileType.getScale();
+      this.readerType = readerType;
       readerPrecision = readerType.getPrecision();
       readerScale = readerType.getScale();
       decimalTreeReader = new DecimalTreeReader(columnId, filePrecision, fileScale);
@@ -2248,7 +2252,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       return new TimestampFromDecimalTreeReader(columnId, fileType, skipCorrupt);
 
     case DECIMAL:
-      // UNDONE: Decimal to Decimal conversion????
+      return new DecimalFromDecimalTreeReader(columnId, fileType, readerType);
 
     // Not currently supported conversion(s):
     case BINARY:
@@ -2354,8 +2358,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
 
     case CHAR:
-      throw new IllegalArgumentException("No conversion of type " +
-          readerType.getCategory() + " to self needed");
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
 
     case BINARY:
       return new BinaryFromStringGroupTreeReader(columnId, fileType);
@@ -2411,8 +2414,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
 
     case VARCHAR:
-      throw new IllegalArgumentException("No conversion of type " +
-          readerType.getCategory() + " to self needed");
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
 
     case BINARY:
       return new BinaryFromStringGroupTreeReader(columnId, fileType);
@@ -2628,11 +2630,11 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
    *   StringGroupFromFloatTreeReader (written)
    *   StringGroupFromDoubleTreeReader (written)
    *   StringGroupFromDecimalTreeReader (written)
-   *  
+   *
    *   String from Char/Varchar conversion
    *   Char from String/Varchar conversion
    *   Varchar from String/Char conversion
-   *  
+   *
    *   StringGroupFromTimestampTreeReader (written)
    *   StringGroupFromDateTreeReader (written)
    *   StringGroupFromBinaryTreeReader *****
@@ -2650,7 +2652,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
    *   TimestampFromDecimalTreeeReader (written)
    *   TimestampFromStringGroupTreeReader (written)
    *   TimestampFromDateTreeReader
-   * 
+   *
    *
    * To DATE:
    *   Convert from (STRING, CHAR, VARCHAR) using string conversion.
@@ -2780,7 +2782,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       // Fall through.
     }
 
-    // Now look for the few cases we don't convert from 
+    // Now look for the few cases we don't convert from
     switch (fileType.getCategory()) {
 
     case BOOLEAN:
@@ -2799,8 +2801,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       default:
         return true;
       }
-   
-    
+
+
     case STRING:
     case CHAR:
     case VARCHAR:
@@ -2836,7 +2838,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
       default:
         return true;
       }
-    
+
     case BINARY:
       switch (readerType.getCategory()) {
       // Not currently supported conversion(s):

http://git-wip-us.apache.org/repos/asf/orc/blob/047265cd/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 68000d6..07b527d 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -100,12 +100,10 @@ public class SchemaEvolution {
           break;
         case CHAR:
         case VARCHAR:
-          // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL)
-          isOk = fileType.getMaxLength() == readerType.getMaxLength();
+          // We do conversion when same CHAR/VARCHAR type but different maxLength.
           break;
         case DECIMAL:
-          // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL)
-          // TODO we don't enforce scale and precision checks, but probably should
+          // We do conversion when same DECIMAL type but different precision/scale.
           break;
         case UNION:
         case MAP:

http://git-wip-us.apache.org/repos/asf/orc/blob/047265cd/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 6c8ecfd..5901c8c 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -2034,7 +2034,7 @@ public class TreeReaderFactory {
       return new NullTreeReader(0);
     }
     TypeDescription.Category readerTypeCategory = readerType.getCategory();
-    if (!fileType.getCategory().equals(readerTypeCategory) &&
+    if (!fileType.equals(readerType) &&
         (readerTypeCategory != TypeDescription.Category.STRUCT &&
          readerTypeCategory != TypeDescription.Category.MAP &&
          readerTypeCategory != TypeDescription.Category.LIST &&