You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/11/20 20:20:54 UTC
hive git commit: HIVE-17631 : upgrade orc to 1.4.1 (Sergey Shelukhin,
reviewed by Prasanth Jayachandran)
Repository: hive
Updated Branches:
refs/heads/master 39d46e8af -> 21008897c
HIVE-17631 : upgrade orc to 1.4.1 (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/21008897
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/21008897
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/21008897
Branch: refs/heads/master
Commit: 21008897c5b7e79aff253967cff3dd308fb901ce
Parents: 39d46e8
Author: sergey <se...@apache.org>
Authored: Mon Nov 20 12:20:39 2017 -0800
Committer: sergey <se...@apache.org>
Committed: Mon Nov 20 12:20:39 2017 -0800
----------------------------------------------------------------------
.../hive/llap/io/encoded/OrcEncodedDataReader.java | 17 +++++++++++++++--
.../llap/io/encoded/SerDeEncodedDataReader.java | 6 +++++-
.../cache/TestIncrementalObjectSizeEstimator.java | 6 ++++++
pom.xml | 2 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 12 +++++++++++-
.../apache/hadoop/hive/ql/io/orc/ReaderImpl.java | 3 ---
.../hive/ql/io/orc/encoded/EncodedReaderImpl.java | 16 ++++++++++------
.../hadoop/hive/ql/io/orc/encoded/Reader.java | 3 ---
.../hadoop/hive/ql/io/orc/encoded/ReaderImpl.java | 8 +-------
.../apache/hadoop/hive/ql/plan/DynamicValue.java | 16 ++++++++++++++--
10 files changed, 63 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index c32f79f..a1ff360 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -36,6 +36,7 @@ import org.apache.orc.TypeDescription;
import org.apache.orc.impl.BufferChunk;
import org.apache.orc.impl.DataReaderProperties;
import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.OrcCodecPool;
import org.apache.orc.impl.OrcIndex;
import org.apache.orc.impl.OrcTail;
import org.apache.orc.impl.ReaderImpl;
@@ -742,8 +743,15 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
metadataCache.decRefBuffer(cacheBuf); // We don't use this one.
}
ByteBuffer bb = footerRange.getData();
- return buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
- bb.remaining(), orcReader.getCodec(), orcReader.getCompressionSize());
+
+ CompressionKind kind = orcReader.getCompressionKind();
+ CompressionCodec codec = OrcCodecPool.getCodec(kind);
+ try {
+ return buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
+ bb.remaining(), codec, orcReader.getCompressionSize());
+ } finally {
+ OrcCodecPool.returnCodec(kind, codec);
+ }
}
private void ensureRawDataReader(boolean isOpen) throws IOException {
@@ -924,6 +932,11 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
}
@Override
+ public CompressionCodec getCompressionCodec() {
+ return orcDataReader.getCompressionCodec();
+ }
+
+ @Override
public DiskRangeList getFileData(Object fileKey, DiskRangeList range,
long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
DiskRangeList result = lowLevelCache.getFileData(
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 599b519..4b0a1ce 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -387,7 +387,6 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
}
}
}
-
}
private String throwIncludesMismatchError(boolean[] translated) throws IOException {
@@ -554,6 +553,11 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
currentStripe.lastRowStart = lastStartOffset;
currentStripe.lastRowEnd = currentFileOffset;
}
+
+ @Override
+ public CompressionCodec getCompressionCodec() {
+ return null;
+ }
}
private interface CacheOutput {
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
----------------------------------------------------------------------
diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
index 1ec7020..406dd0d 100644
--- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
+++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java
@@ -28,6 +28,7 @@ import java.util.ArrayList;
import java.util.LinkedHashSet;
import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.orc.CompressionCodec;
import org.apache.orc.DataReader;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
@@ -159,6 +160,11 @@ public class TestIncrementalObjectSizeEstimator {
@Override
public void close() throws IOException {
}
+
+ @Override
+ public CompressionCodec getCompressionCodec() {
+ return null;
+ }
}
/*
@Test
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 04fb7c3..edb38c9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -182,7 +182,7 @@
<libthrift.version>0.9.3</libthrift.version>
<log4j2.version>2.6.2</log4j2.version>
<opencsv.version>2.3</opencsv.version>
- <orc.version>1.3.3</orc.version>
+ <orc.version>1.4.1</orc.version>
<mockito-all.version>1.10.19</mockito-all.version>
<mina.version>2.0.0-M5</mina.version>
<netty.version>4.0.52.Final</netty.version>
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 1e5b841..dda9f93 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.io.orc;
+import org.apache.hadoop.hive.ql.plan.DynamicValue.NoDynamicValuesException;
+
import org.apache.hadoop.hdfs.DistributedFileSystem;
import java.io.IOException;
@@ -80,6 +82,7 @@ import org.apache.hadoop.hive.ql.io.SyntheticFileId;
import org.apache.hadoop.hive.ql.io.orc.ExternalCache.ExternalFooterCachesByConf;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -2115,7 +2118,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
} else {
// column statistics at index 0 contains only the number of rows
ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
- truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null);
+ PredicateLeaf leaf = predLeaves.get(pred);
+ try {
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, leaf, null);
+ } catch (NoDynamicValuesException dve) {
+ LOG.debug("Dynamic values are not available here {}", dve.getMessage());
+ boolean hasNulls = stats.hasNull() || leaf.getOperator() != Operator.NULL_SAFE_EQUALS;
+ truthValues[pred] = hasNulls ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+ }
}
} else {
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index cbbbb15..9ac02e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -20,13 +20,10 @@ package org.apache.hadoop.hive.ql.io.orc;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.orc.TypeDescription;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
index 80b7be8..29cef30 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
@@ -48,6 +48,7 @@ import org.apache.orc.OrcProto.Stream;
import org.apache.orc.OrcProto.Stream.Kind;
import org.apache.orc.TypeDescription;
import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.OrcCodecPool;
import org.apache.orc.impl.OrcIndex;
import org.apache.orc.impl.OutStream;
import org.apache.orc.impl.RecordReaderUtils;
@@ -125,6 +126,8 @@ class EncodedReaderImpl implements EncodedReader {
private final DataReader dataReader;
private boolean isDataReaderOpen = false;
private final CompressionCodec codec;
+ private final boolean isCompressed;
+ private final org.apache.orc.CompressionKind compressionKind;
private final int bufferSize;
private final List<OrcProto.Type> types;
private final long rowIndexStride;
@@ -135,11 +138,13 @@ class EncodedReaderImpl implements EncodedReader {
private final WriterVersion version;
public EncodedReaderImpl(Object fileKey, List<OrcProto.Type> types,
- TypeDescription fileSchema, CompressionCodec codec, WriterVersion version,
+ TypeDescription fileSchema, org.apache.orc.CompressionKind kind, WriterVersion version,
int bufferSize, long strideRate, DataCache cacheWrapper, DataReader dataReader,
PoolFactory pf, IoTrace trace) throws IOException {
this.fileKey = fileKey;
- this.codec = codec;
+ this.compressionKind = kind;
+ this.isCompressed = kind != org.apache.orc.CompressionKind.NONE;
+ this.codec = OrcCodecPool.getCodec(kind);
this.types = types;
this.fileSchema = fileSchema; // Note: this is redundant with types
this.version = version;
@@ -299,7 +304,6 @@ class EncodedReaderImpl implements EncodedReader {
}
trace.logColumnRead(i, colRgIx, enc.getKind());
}
- boolean isCompressed = (codec != null);
CreateHelper listToRead = new CreateHelper();
boolean hasIndexOnlyCols = false;
for (OrcProto.Stream stream : streamList) {
@@ -336,7 +340,7 @@ class EncodedReaderImpl implements EncodedReader {
} else {
trace.logAddStream(colIx, streamKind, offset, length, indexIx, false);
RecordReaderUtils.addRgFilteredStreamToRanges(stream, rgs,
- codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx),
+ isCompressed, indexes[colIx], encodings.get(colIx), types.get(colIx),
bufferSize, hasNull[colIx], offset, length, listToRead, true);
}
offset += length;
@@ -657,6 +661,7 @@ class EncodedReaderImpl implements EncodedReader {
@Override
public void close() throws IOException {
+ OrcCodecPool.returnCodec(compressionKind, codec);
dataReader.close();
}
@@ -776,7 +781,6 @@ class EncodedReaderImpl implements EncodedReader {
csd.getCacheBuffers().clear();
}
if (cOffset == endCOffset) return null;
- boolean isCompressed = codec != null;
List<ProcCacheChunk> toDecompress = null;
List<IncompleteCb> badEstimates = null;
List<ByteBuffer> toReleaseCopies = null;
@@ -1896,7 +1900,7 @@ class EncodedReaderImpl implements EncodedReader {
private DiskRangeList preReadUncompressedStreams(long stripeOffset, ReadContext[] colCtxs,
MutateHelper toRead, IdentityHashMap<ByteBuffer, Boolean> toRelease) throws IOException {
- if (codec != null) return toRead.next;
+ if (isCompressed) return toRead.next;
DiskRangeList iter = toRead.next; // Keep "toRead" list for future use, don't extract().
boolean hasError = true;
try {
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
index d2bb641..df536ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/Reader.java
@@ -48,9 +48,6 @@ public interface Reader extends org.apache.hadoop.hive.ql.io.orc.Reader {
EncodedReader encodedReader(Object fileKey, DataCache dataCache, DataReader dataReader,
PoolFactory pf, IoTrace trace) throws IOException;
- /** Gets the compression codec for the underlying ORC file. */
- CompressionCodec getCodec();
-
/** The factory that can create (or return) the pools used by encoded reader. */
public interface PoolFactory {
<T> Pool<T> createPool(int size, PoolObjectHelper<T> helper);
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
index a916d58..203ef69 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/ReaderImpl.java
@@ -36,13 +36,7 @@ class ReaderImpl extends org.apache.hadoop.hive.ql.io.orc.ReaderImpl implements
@Override
public EncodedReader encodedReader(Object fileKey, DataCache dataCache, DataReader dataReader,
PoolFactory pf, IoTrace trace) throws IOException {
- return new EncodedReaderImpl(fileKey, types, getSchema(), codec, getWriterVersion(),
+ return new EncodedReaderImpl(fileKey, types, getSchema(), compressionKind, getWriterVersion(),
bufferSize, rowIndexStride, dataCache, dataReader, pf, trace);
}
-
- @Override
- public CompressionCodec getCodec() {
- return codec;
- }
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/21008897/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
index a20328c..5ea2521 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicValue.java
@@ -101,13 +101,23 @@ public class DynamicValue implements LiteralDelegate, Serializable {
return objectInspector.getPrimitiveWritableObject(getValue());
}
+ /**
+ * An exception that indicates that the dynamic values are (intentionally)
+ * not available in this context.
+ */
+ public static class NoDynamicValuesException extends RuntimeException {
+ public NoDynamicValuesException(String message) {
+ super(message);
+ }
+ }
+
public Object getValue() {
if (initialized) {
return val;
}
if (conf == null) {
- throw new IllegalStateException("Cannot retrieve dynamic value " + id + " - no conf set");
+ throw new NoDynamicValuesException("Cannot retrieve dynamic value " + id + " - no conf set");
}
try {
@@ -122,10 +132,12 @@ public class DynamicValue implements LiteralDelegate, Serializable {
// Get the registry
DynamicValueRegistry valueRegistry = cache.retrieve(DYNAMIC_VALUE_REGISTRY_CACHE_KEY);
if (valueRegistry == null) {
- throw new IllegalStateException("DynamicValueRegistry not available");
+ throw new NoDynamicValuesException("DynamicValueRegistry not available");
}
val = valueRegistry.getValue(id);
initialized = true;
+ } catch (NoDynamicValuesException err) {
+ throw err;
} catch (Exception err) {
throw new IllegalStateException("Failed to retrieve dynamic value for " + id, err);
}