You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/11/20 20:20:54 UTC

hive git commit: HIVE-17631 : upgrade orc to 1.4.1 (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Repository: hive
Updated Branches:
  refs/heads/master 39d46e8af -> 21008897c


HIVE-17631 : upgrade orc to 1.4.1 (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/21008897
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/21008897
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/21008897

Branch: refs/heads/master
Commit: 21008897c5b7e79aff253967cff3dd308fb901ce
Parents: 39d46e8
Author: sergey <se...@apache.org>
Authored: Mon Nov 20 12:20:39 2017 -0800
Committer: sergey <se...@apache.org>
Committed: Mon Nov 20 12:20:39 2017 -0800

----------------------------------------------------------------------
 .../hive/llap/io/encoded/OrcEncodedDataReader.java | 17 +++++++++++++++--
 .../llap/io/encoded/SerDeEncodedDataReader.java    |  6 +++++-
 .../cache/TestIncrementalObjectSizeEstimator.java  |  6 ++++++
 pom.xml                                            |  2 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java      | 12 +++++++++++-
 .../apache/hadoop/hive/ql/io/orc/ReaderImpl.java   |  3 ---
 .../hive/ql/io/orc/encoded/EncodedReaderImpl.java  | 16 ++++++++++------
 .../hadoop/hive/ql/io/orc/encoded/Reader.java      |  3 ---
 .../hadoop/hive/ql/io/orc/encoded/ReaderImpl.java  |  8 +-------
 .../apache/hadoop/hive/ql/plan/DynamicValue.java   | 16 ++++++++++++++--
 10 files changed, 63 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index c32f79f..a1ff360 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -36,6 +36,7 @@ import org.apache.orc.TypeDescription;
 import org.apache.orc.impl.BufferChunk;
 import org.apache.orc.impl.DataReaderProperties;
 import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.OrcCodecPool;
 import org.apache.orc.impl.OrcIndex;
 import org.apache.orc.impl.OrcTail;
 import org.apache.orc.impl.ReaderImpl;
@@ -742,8 +743,15 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
       metadataCache.decRefBuffer(cacheBuf); // We don't use this one.
     }
     ByteBuffer bb = footerRange.getData();
-    return buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
-        bb.remaining(), orcReader.getCodec(), orcReader.getCompressionSize());
+
+    CompressionKind kind = orcReader.getCompressionKind();
+    CompressionCodec codec = OrcCodecPool.getCodec(kind);
+    try {
+      return buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
+          bb.remaining(), codec, orcReader.getCompressionSize());
+    } finally {
+      OrcCodecPool.returnCodec(kind, codec);
+    }
   }
 
   private void ensureRawDataReader(boolean isOpen) throws IOException {
@@ -924,6 +932,11 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
     }
 
     @Override
+    public CompressionCodec getCompressionCodec() {
+      return orcDataReader.getCompressionCodec();
+    }
+
+    @Override
     public DiskRangeList getFileData(Object fileKey, DiskRangeList range,
         long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
       DiskRangeList result = lowLevelCache.getFileData(

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 599b519..4b0a1ce 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -387,7 +387,6 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
           }
         }
       }
-
     }
 
     private String throwIncludesMismatchError(boolean[] translated) throws IOException {
@@ -554,6 +553,11 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
       currentStripe.lastRowStart = lastStartOffset;
       currentStripe.lastRowEnd = currentFileOffset;
     }
+
+    @Override
+    public CompressionCodec getCompressionCodec() {
+      return null;
+    }
   }
 
   private interface CacheOutput {

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
----------------------------------------------------------------------
diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
index 1ec7020..406dd0d 100644
--- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
+++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
@@ -28,6 +28,7 @@ import java.util.ArrayList;
 import java.util.LinkedHashSet;
 
 import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.orc.CompressionCodec;
 import org.apache.orc.DataReader;
 import org.apache.orc.OrcFile;
 import org.apache.orc.TypeDescription;
@@ -159,6 +160,11 @@ public class TestIncrementalObjectSizeEstimator {
     @Override
     public void close() throws IOException {
     }
+
+    @Override
+    public CompressionCodec getCompressionCodec() {
+      return null;
+    }
   }
 /*
   @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 04fb7c3..edb38c9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -182,7 +182,7 @@
     <libthrift.version>0.9.3</libthrift.version>
     <log4j2.version>2.6.2</log4j2.version>
     <opencsv.version>2.3</opencsv.version>
-    <orc.version>1.3.3</orc.version>
+    <orc.version>1.4.1</orc.version>
     <mockito-all.version>1.10.19</mockito-all.version>
     <mina.version>2.0.0-M5</mina.version>
     <netty.version>4.0.52.Final</netty.version>

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 1e5b841..dda9f93 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
+import org.apache.hadoop.hive.ql.plan.DynamicValue.NoDynamicValuesException;
+
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 
 import java.io.IOException;
@@ -80,6 +82,7 @@ import org.apache.hadoop.hive.ql.io.SyntheticFileId;
 import org.apache.hadoop.hive.ql.io.orc.ExternalCache.ExternalFooterCachesByConf;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -2115,7 +2118,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         } else {
           // column statistics at index 0 contains only the number of rows
           ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
-          truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
+          PredicateLeaf leaf = predLeaves.get(pred);
+          try {
+            truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, leaf, null);
+          } catch (NoDynamicValuesException dve) {
+            LOG.debug("Dynamic values are not available here {}", dve.getMessage());
+            boolean hasNulls = stats.hasNull() || leaf.getOperator() != Operator.NULL_SAFE_EQUALS;
+            truthValues[pred] = hasNulls ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+          }
         }
       } else {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index cbbbb15..9ac02e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -20,13 +20,10 @@ package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.Arrays;
 
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.orc.TypeDescription;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
index 80b7be8..29cef30 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
@@ -48,6 +48,7 @@ import org.apache.orc.OrcProto.Stream;
 import org.apache.orc.OrcProto.Stream.Kind;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.OrcCodecPool;
 import org.apache.orc.impl.OrcIndex;
 import org.apache.orc.impl.OutStream;
 import org.apache.orc.impl.RecordReaderUtils;
@@ -125,6 +126,8 @@ class EncodedReaderImpl implements EncodedReader {
   private final DataReader dataReader;
   private boolean isDataReaderOpen = false;
   private final CompressionCodec codec;
+  private final boolean isCompressed;
+  private final org.apache.orc.CompressionKind compressionKind;
   private final int bufferSize;
   private final List<OrcProto.Type> types;
   private final long rowIndexStride;
@@ -135,11 +138,13 @@ class EncodedReaderImpl implements EncodedReader {
   private final WriterVersion version;
 
   public EncodedReaderImpl(Object fileKey, List<OrcProto.Type> types,
-      TypeDescription fileSchema, CompressionCodec codec, WriterVersion version,
+      TypeDescription fileSchema, org.apache.orc.CompressionKind kind, WriterVersion version,
       int bufferSize, long strideRate, DataCache cacheWrapper, DataReader dataReader,
       PoolFactory pf, IoTrace trace) throws IOException {
     this.fileKey = fileKey;
-    this.codec = codec;
+    this.compressionKind = kind;
+    this.isCompressed = kind != org.apache.orc.CompressionKind.NONE;
+    this.codec = OrcCodecPool.getCodec(kind);
     this.types = types;
     this.fileSchema = fileSchema; // Note: this is redundant with types
     this.version = version;
@@ -299,7 +304,6 @@ class EncodedReaderImpl implements EncodedReader {
       }
       trace.logColumnRead(i, colRgIx, enc.getKind());
     }
-    boolean isCompressed = (codec != null);
     CreateHelper listToRead = new CreateHelper();
     boolean hasIndexOnlyCols = false;
     for (OrcProto.Stream stream : streamList) {
@@ -336,7 +340,7 @@ class EncodedReaderImpl implements EncodedReader {
       } else {
         trace.logAddStream(colIx, streamKind, offset, length, indexIx, false);
         RecordReaderUtils.addRgFilteredStreamToRanges(stream, rgs,
-            codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx),
+            isCompressed, indexes[colIx], encodings.get(colIx), types.get(colIx),
             bufferSize, hasNull[colIx], offset, length, listToRead, true);
       }
       offset += length;
@@ -657,6 +661,7 @@ class EncodedReaderImpl implements EncodedReader {
 
   @Override
   public void close() throws IOException {
+    OrcCodecPool.returnCodec(compressionKind, codec);
     dataReader.close();
   }
 
@@ -776,7 +781,6 @@ class EncodedReaderImpl implements EncodedReader {
       csd.getCacheBuffers().clear();
     }
     if (cOffset == endCOffset) return null;
-    boolean isCompressed = codec != null;
     List<ProcCacheChunk> toDecompress = null;
     List<IncompleteCb> badEstimates = null;
     List<ByteBuffer> toReleaseCopies = null;
@@ -1896,7 +1900,7 @@ class EncodedReaderImpl implements EncodedReader {
 
   private DiskRangeList preReadUncompressedStreams(long stripeOffset, ReadContext[] colCtxs,
       MutateHelper toRead, IdentityHashMap<ByteBuffer, Boolean> toRelease) throws IOException {
-    if (codec != null) return toRead.next;
+    if (isCompressed) return toRead.next;
     DiskRangeList iter = toRead.next;  // Keep "toRead" list for future use, don't extract().
     boolean hasError = true;
     try {

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
index d2bb641..df536ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
@@ -48,9 +48,6 @@ public interface Reader extends org.apache.hadoop.hive.ql.io.orc.Reader {
   EncodedReader encodedReader(Object fileKey, DataCache dataCache, DataReader dataReader,
       PoolFactory pf, IoTrace trace) throws IOException;
 
-  /** Gets the compression codec for the underlying ORC file. */
-  CompressionCodec getCodec();
-
   /** The factory that can create (or return) the pools used by encoded reader. */
   public interface PoolFactory {
     <T> Pool<T> createPool(int size, PoolObjectHelper<T> helper);

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
index a916d58..203ef69 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
@@ -36,13 +36,7 @@ class ReaderImpl extends org.apache.hadoop.hive.ql.io.orc.ReaderImpl implements
   @Override
   public EncodedReader encodedReader(Object fileKey, DataCache dataCache, DataReader dataReader,
       PoolFactory pf, IoTrace trace) throws IOException {
-    return new EncodedReaderImpl(fileKey, types, getSchema(), codec, getWriterVersion(),
+    return new EncodedReaderImpl(fileKey, types, getSchema(), compressionKind, getWriterVersion(),
         bufferSize, rowIndexStride, dataCache, dataReader, pf, trace);
   }
-
-  @Override
-  public CompressionCodec getCodec() {
-    return codec;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
index a20328c..5ea2521 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
@@ -101,13 +101,23 @@ public class DynamicValue implements LiteralDelegate, Serializable {
     return objectInspector.getPrimitiveWritableObject(getValue());
   }
 
+  /**
+   * An exception that indicates that the dynamic values are (intentionally)
+   * not available in this context.
+   */
+  public static class NoDynamicValuesException extends RuntimeException {
+    public NoDynamicValuesException(String message) {
+      super(message);
+    }
+  }
+
   public Object getValue() {
     if (initialized) {
       return val;
     }
 
     if (conf == null) {
-      throw new IllegalStateException("Cannot retrieve dynamic value " + id + " - no conf set");
+      throw new NoDynamicValuesException("Cannot retrieve dynamic value " + id + " - no conf set");
     }
 
     try {
@@ -122,10 +132,12 @@ public class DynamicValue implements LiteralDelegate, Serializable {
       // Get the registry
       DynamicValueRegistry valueRegistry = cache.retrieve(DYNAMIC_VALUE_REGISTRY_CACHE_KEY);
       if (valueRegistry == null) {
-        throw new IllegalStateException("DynamicValueRegistry not available");
+        throw new NoDynamicValuesException("DynamicValueRegistry not available");
       }
       val = valueRegistry.getValue(id);
       initialized = true;
+    } catch (NoDynamicValuesException err) {
+      throw err;
     } catch (Exception err) {
       throw new IllegalStateException("Failed to retrieve dynamic value for " + id, err);
     }