You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/08/01 19:36:23 UTC
[2/2] hive git commit: HIVE-14377 : LLAP IO: issue with how estimate
cache removes unneeded buffers (Sergey Shelukhin, reviewed by Prasanth J)
HIVE-14377 : LLAP IO: issue with how estimate cache removes unneeded buffers (Sergey Shelukhin, reviewed by Prasanth J)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0829581e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0829581e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0829581e
Branch: refs/heads/branch-2.1
Commit: 0829581e4a0158aefe77a40928b147140e31112b
Parents: 113cfb0
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Aug 1 12:01:14 2016 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Aug 1 12:34:12 2016 -0700
----------------------------------------------------------------------
.../hive/llap/cache/LowLevelCacheImpl.java | 2 +-
.../llap/io/encoded/OrcEncodedDataReader.java | 5 +++
.../llap/io/metadata/OrcFileEstimateErrors.java | 6 +++-
.../ql/io/orc/encoded/EncodedReaderImpl.java | 16 +++------
.../hive/ql/io/orc/encoded/IncompleteCb.java | 37 ++++++++++++++++++++
5 files changed, 53 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
index 038c3ed..8bc675d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
@@ -171,7 +171,7 @@ public class LowLevelCacheImpl implements LowLevelCache, BufferUsageManager, Lla
metrics.incrCacheHitBytes(Math.min(requestedLength, currentCached.getLength()));
}
if (currentNotCached != null) {
- assert !currentNotCached.hasData();
+ assert !currentNotCached.hasData(); // Assumes no ranges passed to cache to read have data.
if (gotAllData != null) {
gotAllData.value = false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 8b6fc8b..e9794bd 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -833,6 +833,11 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
DiskRangeList result = (lowLevelCache == null) ? range
: lowLevelCache.getFileData(fileKey, range, baseOffset, factory, counters, gotAllData);
+ if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) {
+ LlapIoImpl.ORC_LOGGER.trace("Disk ranges after data cache (file " + fileKey
+ + ", base offset " + baseOffset + "): "
+ + RecordReaderUtils.stringifyDiskRanges(range.next));
+ }
if (gotAllData.value) return result;
return (metadataCache == null) ? range
: metadataCache.getIncompleteCbs(fileKey, range, baseOffset, factory, gotAllData);
http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
index ad88b98..20af0d0 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimato
import org.apache.hadoop.hive.llap.cache.EvictionDispatcher;
import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer;
import org.apache.hadoop.hive.ql.io.SyntheticFileId;
+import org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb;
public class OrcFileEstimateErrors extends LlapCacheableBuffer {
private final Object fileKey;
@@ -67,6 +68,7 @@ public class OrcFileEstimateErrors extends LlapCacheableBuffer {
prev = new MutateHelper(ranges);
}
DiskRangeList current = ranges;
+ gotAllData.value = true; // Assume by default that we would find everything.
while (current != null) {
// We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
DiskRangeList check = current;
@@ -77,7 +79,9 @@ public class OrcFileEstimateErrors extends LlapCacheableBuffer {
gotAllData.value = false;
continue;
}
- check.removeSelf();
+ // We could just remove here and handle the missing tail during read, but that can be
+ // dangerous; let's explicitly add an incomplete CB.
+ check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd()));
}
return prev.next;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
index dad35e3..bcb54d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
@@ -710,6 +710,11 @@ class EncodedReaderImpl implements EncodedReader {
ponderReleaseInitialRefcount(unlockUntilCOffset, streamOffset, cc);
lastUncompressed = cc;
next = current.next;
+ if (next != null && (endCOffset >= 0 && currentOffset < endCOffset)
+ && next.getOffset() >= endCOffset) {
+ throw new IOException("Expected data at " + currentOffset + " (reading until "
+ + endCOffset + "), but the next buffer starts at " + next.getOffset());
+ }
} else if (current instanceof IncompleteCb) {
// 2b. This is a known incomplete CB caused by ORC CB end boundaries being estimates.
if (isTracingEnabled) {
@@ -1123,17 +1128,6 @@ class EncodedReaderImpl implements EncodedReader {
return ranges;
}
- private static class IncompleteCb extends DiskRangeList {
- public IncompleteCb(long offset, long end) {
- super(offset, end);
- }
-
- @Override
- public String toString() {
- return "incomplete CB start: " + offset + " end: " + end;
- }
- }
-
/**
* Reads one compression block from the source; handles compression blocks read from
* multiple ranges (usually, that would only happen with zcr).
http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java
new file mode 100644
index 0000000..e3671ab
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc.encoded;
+
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+
+public class IncompleteCb extends DiskRangeList {
+ public IncompleteCb(long offset, long end) {
+ super(offset, end);
+ }
+
+ @Override
+ public String toString() {
+ return "incomplete CB start: " + offset + " end: " + end;
+ }
+
+ @Override
+ public boolean hasData() {
+ return true; // Should not be treated like it needs data.
+ }
+}
\ No newline at end of file