You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2016/05/20 21:22:45 UTC
[07/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
deleted file mode 100644
index da2c681..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-
-import org.junit.Test;
-
-public class TestOrcWideTable {
-
-  @Test
-  public void testBufferSizeFor1Col() throws IOException {
-    assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-        1, 128*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor50Col() throws IOException {
-    assertEquals(256 * 1024, WriterImpl.getEstimatedBufferSize(256 * 1024 * 1024,
-        50, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor1000Col() throws IOException {
-    assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-        1000, 128*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor2000Col() throws IOException {
-    assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-        2000, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor4000Col() throws IOException {
-    assertEquals(8 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-        4000, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor25000Col() throws IOException {
-    assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
-        25000, 256*1024));
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
deleted file mode 100644
index 1a3559e..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.PrintStream;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestRLEv2 {
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-  Path testFilePath;
-  Configuration conf;
-  FileSystem fs;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem () throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestRLEv2." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testFixedDeltaZero() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    for (int i = 0; i < 5120; ++i) {
-      w.addRow(123);
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
-    // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaOne() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    for (int i = 0; i < 5120; ++i) {
-      w.addRow(i % 512);
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
-    // and 1 byte delta (delta = 1). In total, 4 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaOneDescending() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    for (int i = 0; i < 5120; ++i) {
-      w.addRow(512 - (i % 512));
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
-    // and 1 byte delta (delta = 1). In total, 5 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaLarge() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    for (int i = 0; i < 5120; ++i) {
-      w.addRow(i % 512 + ((i % 512 ) * 100));
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
-    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaLargeDescending() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    for (int i = 0; i < 5120; ++i) {
-      w.addRow((512 - i % 512) + ((i % 512 ) * 100));
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
-    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testShortRepeat() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    for (int i = 0; i < 5; ++i) {
-      w.addRow(10);
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 1 byte header + 1 byte value
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testDeltaUnknownSign() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    w.addRow(0);
-    for (int i = 0; i < 511; ++i) {
-      w.addRow(i);
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
-    // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
-    // each, 5120/8 = 640). Total bytes 642
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testPatchedBase() throws Exception {
-    ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
-        Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .inspector(inspector)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    Random rand = new Random(123);
-    w.addRow(10000000);
-    for (int i = 0; i < 511; ++i) {
-      w.addRow(rand.nextInt(i+1));
-    }
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // use PATCHED_BASE encoding
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
-    System.setOut(origOut);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java
deleted file mode 100644
index e0199d6..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestReaderImpl.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright 2016 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.ByteArrayInputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PositionedReadable;
-import org.apache.hadoop.fs.Seekable;
-import org.apache.hadoop.hive.ql.io.FileFormatException;
-import org.apache.hadoop.io.Text;
-import org.junit.Test;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.rules.ExpectedException;
-
-public class TestReaderImpl {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  private final Path path = new Path("test-file.orc");
-  private FSDataInputStream in;
-  private int psLen;
-  private ByteBuffer buffer;
-
-  @Before
-  public void setup() {
-    in = null;
-  }
-
-  @Test
-  public void testEnsureOrcFooterSmallTextFile() throws IOException {
-    prepareTestCase("1".getBytes());
-    thrown.expect(FileFormatException.class);
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  @Test
-  public void testEnsureOrcFooterLargeTextFile() throws IOException {
-    prepareTestCase("This is Some Text File".getBytes());
-    thrown.expect(FileFormatException.class);
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  @Test
-  public void testEnsureOrcFooter011ORCFile() throws IOException {
-    prepareTestCase(composeContent(OrcFile.MAGIC, "FOOTER"));
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  @Test
-  public void testEnsureOrcFooterCorrectORCFooter() throws IOException {
-    prepareTestCase(composeContent("",OrcFile.MAGIC));
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  private void prepareTestCase(byte[] bytes) {
-    buffer = ByteBuffer.wrap(bytes);
-    psLen = buffer.get(bytes.length - 1) & 0xff;
-    in = new FSDataInputStream(new SeekableByteArrayInputStream(bytes));
-  }
-
-  private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException {
-    ByteBuffer header = Text.encode(headerStr);
-    ByteBuffer footer = Text.encode(footerStr);
-    int headerLen = header.remaining();
-    int footerLen = footer.remaining() + 1;
-
-    ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen);
-
-    buf.put(header);
-    buf.put(footer);
-    buf.put((byte) footerLen);
-    return buf.array();
-  }
-
-  private static final class SeekableByteArrayInputStream extends ByteArrayInputStream
-          implements Seekable, PositionedReadable {
-
-    public SeekableByteArrayInputStream(byte[] buf) {
-      super(buf);
-    }
-
-    @Override
-    public void seek(long pos) throws IOException {
-      this.reset();
-      this.skip(pos);
-    }
-
-    @Override
-    public long getPos() throws IOException {
-      return pos;
-    }
-
-    @Override
-    public boolean seekToNewSource(long targetPos) throws IOException {
-      return false;
-    }
-
-    @Override
-    public int read(long position, byte[] buffer, int offset, int length)
-            throws IOException {
-      long oldPos = getPos();
-      int nread = -1;
-      try {
-        seek(position);
-        nread = read(buffer, offset, length);
-      } finally {
-        seek(oldPos);
-      }
-      return nread;
-    }
-
-    @Override
-    public void readFully(long position, byte[] buffer, int offset, int length)
-            throws IOException {
-      int nread = 0;
-      while (nread < length) {
-        int nbytes = read(position + nread, buffer, offset + nread, length - nread);
-        if (nbytes < 0) {
-          throw new EOFException("End of file reached before reading fully.");
-        }
-        nread += nbytes;
-      }
-    }
-
-    @Override
-    public void readFully(long position, byte[] buffer)
-            throws IOException {
-      readFully(position, buffer, 0, buffer.length);
-    }
-  }
-}