You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/03/17 23:47:42 UTC
[38/51] [abbrv] hive git commit: HIVE-13185 :
orc.ReaderImp.ensureOrcFooter() method fails on small text files with
IndexOutOfBoundsException (Illya Yalovyy, reviewed by Sergey Shelukhin)
HIVE-13185 : orc.ReaderImp.ensureOrcFooter() method fails on small text files with IndexOutOfBoundsException (Illya Yalovyy, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f07fdfbc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f07fdfbc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f07fdfbc
Branch: refs/heads/llap
Commit: f07fdfbc7226364cdb21784b308bd2adfe114309
Parents: b6af012
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Mar 14 16:57:57 2016 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Mar 14 16:57:57 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 15 +-
.../hadoop/hive/ql/io/orc/TestReaderImpl.java | 151 +++++++++++++++++++
2 files changed, 159 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f07fdfbc/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 773c2b1..a031a92 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -256,21 +256,22 @@ public class ReaderImpl implements Reader {
Path path,
int psLen,
ByteBuffer buffer) throws IOException {
- int len = OrcFile.MAGIC.length();
- if (psLen < len + 1) {
+ int magicLength = OrcFile.MAGIC.length();
+ int fullLength = magicLength + 1;
+ if (psLen < fullLength || buffer.remaining() < fullLength) {
throw new FileFormatException("Malformed ORC file " + path +
". Invalid postscript length " + psLen);
}
- int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len;
+ int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
byte[] array = buffer.array();
// now look for the magic string at the end of the postscript.
- if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
+ if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
// If it isn't there, this may be the 0.11.0 version of ORC.
// Read the first 3 bytes of the file to check for the header
- byte[] header = new byte[len];
- in.readFully(0, header, 0, len);
+ byte[] header = new byte[magicLength];
+ in.readFully(0, header, 0, magicLength);
// if it isn't there, this isn't an ORC file
- if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
+ if (!Text.decode(header, 0 , magicLength).equals(OrcFile.MAGIC)) {
throw new FileFormatException("Malformed ORC file " + path +
". Invalid postscript.");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f07fdfbc/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java
new file mode 100644
index 0000000..e0199d6
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.hive.ql.io.FileFormatException;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.ExpectedException;
+
+public class TestReaderImpl {
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ private final Path path = new Path("test-file.orc");
+ private FSDataInputStream in;
+ private int psLen;
+ private ByteBuffer buffer;
+
+ @Before
+ public void setup() {
+ in = null;
+ }
+
+ @Test
+ public void testEnsureOrcFooterSmallTextFile() throws IOException {
+ prepareTestCase("1".getBytes());
+ thrown.expect(FileFormatException.class);
+ ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+ }
+
+ @Test
+ public void testEnsureOrcFooterLargeTextFile() throws IOException {
+ prepareTestCase("This is Some Text File".getBytes());
+ thrown.expect(FileFormatException.class);
+ ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+ }
+
+ @Test
+ public void testEnsureOrcFooter011ORCFile() throws IOException {
+ prepareTestCase(composeContent(OrcFile.MAGIC, "FOOTER"));
+ ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+ }
+
+ @Test
+ public void testEnsureOrcFooterCorrectORCFooter() throws IOException {
+ prepareTestCase(composeContent("",OrcFile.MAGIC));
+ ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+ }
+
+ private void prepareTestCase(byte[] bytes) {
+ buffer = ByteBuffer.wrap(bytes);
+ psLen = buffer.get(bytes.length - 1) & 0xff;
+ in = new FSDataInputStream(new SeekableByteArrayInputStream(bytes));
+ }
+
+ private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException {
+ ByteBuffer header = Text.encode(headerStr);
+ ByteBuffer footer = Text.encode(footerStr);
+ int headerLen = header.remaining();
+ int footerLen = footer.remaining() + 1;
+
+ ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen);
+
+ buf.put(header);
+ buf.put(footer);
+ buf.put((byte) footerLen);
+ return buf.array();
+ }
+
+ private static final class SeekableByteArrayInputStream extends ByteArrayInputStream
+ implements Seekable, PositionedReadable {
+
+ public SeekableByteArrayInputStream(byte[] buf) {
+ super(buf);
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ this.reset();
+ this.skip(pos);
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return pos;
+ }
+
+ @Override
+ public boolean seekToNewSource(long targetPos) throws IOException {
+ return false;
+ }
+
+ @Override
+ public int read(long position, byte[] buffer, int offset, int length)
+ throws IOException {
+ long oldPos = getPos();
+ int nread = -1;
+ try {
+ seek(position);
+ nread = read(buffer, offset, length);
+ } finally {
+ seek(oldPos);
+ }
+ return nread;
+ }
+
+ @Override
+ public void readFully(long position, byte[] buffer, int offset, int length)
+ throws IOException {
+ int nread = 0;
+ while (nread < length) {
+ int nbytes = read(position + nread, buffer, offset + nread, length - nread);
+ if (nbytes < 0) {
+ throw new EOFException("End of file reached before reading fully.");
+ }
+ nread += nbytes;
+ }
+ }
+
+ @Override
+ public void readFully(long position, byte[] buffer)
+ throws IOException {
+ readFully(position, buffer, 0, buffer.length);
+ }
+ }
+}