You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/08/01 19:36:23 UTC

[2/2] hive git commit: HIVE-14377 : LLAP IO: issue with how estimate cache removes unneeded buffers (Sergey Shelukhin, reviewed by Prasanth J)

HIVE-14377 : LLAP IO: issue with how estimate cache removes unneeded buffers (Sergey Shelukhin, reviewed by Prasanth J)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0829581e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0829581e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0829581e

Branch: refs/heads/branch-2.1
Commit: 0829581e4a0158aefe77a40928b147140e31112b
Parents: 113cfb0
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Aug 1 12:01:14 2016 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Aug 1 12:34:12 2016 -0700

----------------------------------------------------------------------
 .../hive/llap/cache/LowLevelCacheImpl.java      |  2 +-
 .../llap/io/encoded/OrcEncodedDataReader.java   |  5 +++
 .../llap/io/metadata/OrcFileEstimateErrors.java |  6 +++-
 .../ql/io/orc/encoded/EncodedReaderImpl.java    | 16 +++------
 .../hive/ql/io/orc/encoded/IncompleteCb.java    | 37 ++++++++++++++++++++
 5 files changed, 53 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
index 038c3ed..8bc675d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
@@ -171,7 +171,7 @@ public class LowLevelCacheImpl implements LowLevelCache, BufferUsageManager, Lla
       metrics.incrCacheHitBytes(Math.min(requestedLength, currentCached.getLength()));
     }
     if (currentNotCached != null) {
-      assert !currentNotCached.hasData();
+      assert !currentNotCached.hasData(); // Assumes no ranges passed to cache to read have data.
       if (gotAllData != null) {
         gotAllData.value = false;
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 8b6fc8b..e9794bd 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -833,6 +833,11 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
       DiskRangeList result = (lowLevelCache == null) ? range
           : lowLevelCache.getFileData(fileKey, range, baseOffset, factory, counters, gotAllData);
+      if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) {
+        LlapIoImpl.ORC_LOGGER.trace("Disk ranges after data cache (file " + fileKey
+            + ", base offset " + baseOffset + "): "
+            + RecordReaderUtils.stringifyDiskRanges(range.next));
+      }
       if (gotAllData.value) return result;
       return (metadataCache == null) ? range
           : metadataCache.getIncompleteCbs(fileKey, range, baseOffset, factory, gotAllData);

http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
index ad88b98..20af0d0 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimato
 import org.apache.hadoop.hive.llap.cache.EvictionDispatcher;
 import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer;
 import org.apache.hadoop.hive.ql.io.SyntheticFileId;
+import org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb;
 
 public class OrcFileEstimateErrors extends LlapCacheableBuffer {
   private final Object fileKey;
@@ -67,6 +68,7 @@ public class OrcFileEstimateErrors extends LlapCacheableBuffer {
       prev = new MutateHelper(ranges);
     }
     DiskRangeList current = ranges;
+    gotAllData.value = true; // Assume by default that we would find everything.
     while (current != null) {
       // We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
       DiskRangeList check = current;
@@ -77,7 +79,9 @@ public class OrcFileEstimateErrors extends LlapCacheableBuffer {
         gotAllData.value = false;
         continue;
       }
-      check.removeSelf();
+      // We could just remove here and handle the missing tail during read, but that can be
+      // dangerous; let's explicitly add an incomplete CB.
+      check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd()));
     }
     return prev.next;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
index dad35e3..bcb54d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java
@@ -710,6 +710,11 @@ class EncodedReaderImpl implements EncodedReader {
         ponderReleaseInitialRefcount(unlockUntilCOffset, streamOffset, cc);
         lastUncompressed = cc;
         next = current.next;
+        if (next != null && (endCOffset >= 0 && currentOffset < endCOffset)
+            && next.getOffset() >= endCOffset) {
+          throw new IOException("Expected data at " + currentOffset + " (reading until "
+            + endCOffset + "), but the next buffer starts at " + next.getOffset());
+        }
       } else if (current instanceof IncompleteCb)  {
         // 2b. This is a known incomplete CB caused by ORC CB end boundaries being estimates.
         if (isTracingEnabled) {
@@ -1123,17 +1128,6 @@ class EncodedReaderImpl implements EncodedReader {
     return ranges;
   }
 
-  private static class IncompleteCb extends DiskRangeList {
-    public IncompleteCb(long offset, long end) {
-      super(offset, end);
-    }
-
-    @Override
-    public String toString() {
-      return "incomplete CB start: " + offset + " end: " + end;
-    }
-  }
-
   /**
    * Reads one compression block from the source; handles compression blocks read from
    * multiple ranges (usually, that would only happen with zcr).

http://git-wip-us.apache.org/repos/asf/hive/blob/0829581e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java
new file mode 100644
index 0000000..e3671ab
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/IncompleteCb.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc.encoded;
+
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+
+public class IncompleteCb extends DiskRangeList {
+  public IncompleteCb(long offset, long end) {
+    super(offset, end);
+  }
+
+  @Override
+  public String toString() {
+    return "incomplete CB start: " + offset + " end: " + end;
+  }
+
+  @Override
+  public boolean hasData() {
+    return true; // Should not be treated like it needs data.
+  }
+}
\ No newline at end of file