You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/08/19 20:41:11 UTC

hive git commit: HIVE-11592: ORC metadata section can sometimes exceed protobuf message size limit (Prasanth Jayachandran reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master 2688b6800 -> f8b02610d


HIVE-11592: ORC metadata section can sometimes exceed protobuf message size limit (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b02610
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b02610
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b02610

Branch: refs/heads/master
Commit: f8b02610d745e63e3d596d7532e84e49eedbd62e
Parents: 2688b68
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Aug 19 11:40:52 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Aug 19 11:40:52 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       | 34 +++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f8b02610/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index a6448b6..c990d85 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -46,12 +46,15 @@ import org.apache.hadoop.io.Text;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.InvalidProtocolBufferException;
 
 public class ReaderImpl implements Reader {
 
   private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
 
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
+  private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20;  // 64MB
+  private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
 
   protected final FileSystem fileSystem;
   protected final Path path;
@@ -468,7 +471,36 @@ public class ReaderImpl implements Reader {
 
       InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
           new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
-      this.metadata = OrcProto.Metadata.parseFrom(instream);
+      CodedInputStream in = CodedInputStream.newInstance(instream);
+      int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
+      OrcProto.Metadata meta = null;
+      do {
+        try {
+          in.setSizeLimit(msgLimit);
+          meta = OrcProto.Metadata.parseFrom(in);
+        } catch (InvalidProtocolBufferException e) {
+          if (e.getMessage().contains("Protocol message was too large")) {
+            LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
+                " size of the coded input stream." );
+
+            msgLimit = msgLimit << 1;
+            if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
+              LOG.error("Metadata section exceeds max protobuf message size of " +
+                  PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
+              throw e;
+            }
+
+            // we must have failed in the middle of reading instream and instream doesn't support
+            // resetting the stream
+            instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+                new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
+            in = CodedInputStream.newInstance(instream);
+          } else {
+            throw e;
+          }
+        }
+      } while (meta == null);
+      this.metadata = meta;
 
       footerBuffer.position(position + metadataSize);
       footerBuffer.limit(position + metadataSize + footerBufferSize);