You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/08/19 20:41:11 UTC
hive git commit: HIVE-11592: ORC metadata section can sometimes
exceed protobuf message size limit (Prasanth Jayachandran reviewed by Sergey
Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master 2688b6800 -> f8b02610d
HIVE-11592: ORC metadata section can sometimes exceed protobuf message size limit (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b02610
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b02610
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b02610
Branch: refs/heads/master
Commit: f8b02610d745e63e3d596d7532e84e49eedbd62e
Parents: 2688b68
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Aug 19 11:40:52 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Aug 19 11:40:52 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 34 +++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f8b02610/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index a6448b6..c990d85 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -46,12 +46,15 @@ import org.apache.hadoop.io.Text;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.InvalidProtocolBufferException;
public class ReaderImpl implements Reader {
private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
+ private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20; // 64MB
+ private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
protected final FileSystem fileSystem;
protected final Path path;
@@ -468,7 +471,36 @@ public class ReaderImpl implements Reader {
InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
- this.metadata = OrcProto.Metadata.parseFrom(instream);
+ CodedInputStream in = CodedInputStream.newInstance(instream);
+ int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
+ OrcProto.Metadata meta = null;
+ do {
+ try {
+ in.setSizeLimit(msgLimit);
+ meta = OrcProto.Metadata.parseFrom(in);
+ } catch (InvalidProtocolBufferException e) {
+ if (e.getMessage().contains("Protocol message was too large")) {
+ LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
+ " size of the coded input stream." );
+
+ msgLimit = msgLimit << 1;
+ if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
+ LOG.error("Metadata section exceeds max protobuf message size of " +
+ PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
+ throw e;
+ }
+
+ // we must have failed in the middle of reading instream and instream doesn't support
+ // resetting the stream
+ instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
+ new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
+ in = CodedInputStream.newInstance(instream);
+ } else {
+ throw e;
+ }
+ }
+ } while (meta == null);
+ this.metadata = meta;
footerBuffer.position(position + metadataSize);
footerBuffer.limit(position + metadataSize + footerBufferSize);