You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/07/19 16:58:32 UTC

[09/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java b/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java
new file mode 100644
index 0000000..dbdded5
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestBitFieldReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    final int COUNT = 16384;
+    BitFieldWriter out = new BitFieldWriter(
+        new OutStream("test", 500, codec, collect), 1);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[COUNT];
+    for(int i=0; i < COUNT; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, non-runs
+      if (i < COUNT / 2) {
+        out.write(i & 1);
+      } else {
+        out.write((i/3) & 1);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    BitFieldReader in = new BitFieldReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+        codec, 500), 1);
+    for(int i=0; i < COUNT; ++i) {
+      int x = in.next();
+      if (i < COUNT / 2) {
+        assertEquals(i & 1, x);
+      } else {
+        assertEquals((i/3) & 1, x);
+      }
+    }
+    for(int i=COUNT-1; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = in.next();
+      if (i < COUNT / 2) {
+        assertEquals(i & 1, x);
+      } else {
+        assertEquals((i/3) & 1, x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testBiggerItems() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    final int COUNT = 16384;
+    BitFieldWriter out = new BitFieldWriter(
+        new OutStream("test", 500, null, collect), 3);
+    for(int i=0; i < COUNT; ++i) {
+      // test runs, non-runs
+      if (i < COUNT / 2) {
+        out.write(i & 7);
+      } else {
+        out.write((i/3) & 7);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    BitFieldReader in = new BitFieldReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+        null, 500), 3);
+    for(int i=0; i < COUNT; ++i) {
+      int x = in.next();
+      if (i < COUNT / 2) {
+        assertEquals(i & 7, x);
+      } else {
+        assertEquals((i/3) & 7, x);
+      }
+    }
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    BitFieldWriter out = new BitFieldWriter(
+        new OutStream("test", 100, null, collect), 1);
+    final int COUNT = 16384;
+    for(int i=0; i < COUNT; ++i) {
+      if (i < COUNT/2) {
+        out.write(i & 1);
+      } else {
+        out.write((i/3) & 1);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    BitFieldReader in = new BitFieldReader(InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), null, 100), 1);
+    for(int i=0; i < COUNT; i += 5) {
+      int x = (int) in.next();
+      if (i < COUNT/2) {
+        assertEquals(i & 1, x);
+      } else {
+        assertEquals((i/3) & 1, x);
+      }
+      if (i < COUNT - 5) {
+        in.skip(4);
+      }
+      in.skip(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java b/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java
new file mode 100644
index 0000000..1790c35
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java
@@ -0,0 +1,279 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import com.google.common.primitives.Longs;
+
+public class TestBitPack {
+
+  private static final int SIZE = 100;
+  private static Random rand = new Random(100);
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
+      + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  private long[] deltaEncode(long[] inp) {
+    long[] output = new long[inp.length];
+    SerializationUtils utils = new SerializationUtils();
+    for (int i = 0; i < inp.length; i++) {
+      output[i] = utils.zigzagEncode(inp[i]);
+    }
+    return output;
+  }
+
+  private long nextLong(Random rng, long n) {
+    long bits, val;
+    do {
+      bits = (rng.nextLong() << 1) >>> 1;
+      val = bits % n;
+    } while (bits - val + (n - 1) < 0L);
+    return val;
+  }
+
+  private void runTest(int numBits) throws IOException {
+    long[] inp = new long[SIZE];
+    for (int i = 0; i < SIZE; i++) {
+      long val = 0;
+      if (numBits <= 32) {
+        if (numBits == 1) {
+          val = -1 * rand.nextInt(2);
+        } else {
+          val = rand.nextInt((int) Math.pow(2, numBits - 1));
+        }
+      } else {
+        val = nextLong(rand, (long) Math.pow(2, numBits - 2));
+      }
+      if (val % 2 == 0) {
+        val = -val;
+      }
+      inp[i] = val;
+    }
+    long[] deltaEncoded = deltaEncode(inp);
+    long minInput = Collections.min(Longs.asList(deltaEncoded));
+    long maxInput = Collections.max(Longs.asList(deltaEncoded));
+    long rangeInput = maxInput - minInput;
+    SerializationUtils utils = new SerializationUtils();
+    int fixedWidth = utils.findClosestNumBits(rangeInput);
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    OutStream output = new OutStream("test", SIZE, null, collect);
+    utils.writeInts(deltaEncoded, 0, deltaEncoded.length, fixedWidth, output);
+    output.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    long[] buff = new long[SIZE];
+    utils.readInts(buff, 0, SIZE, fixedWidth, InStream.create("test", new ByteBuffer[] { inBuf },
+        new long[] { 0 }, inBuf.remaining(), null, SIZE));
+    for (int i = 0; i < SIZE; i++) {
+      buff[i] = utils.zigzagDecode(buff[i]);
+    }
+    assertEquals(numBits, fixedWidth);
+    assertArrayEquals(inp, buff);
+  }
+
+  @Test
+  public void test01BitPacking1Bit() throws IOException {
+    runTest(1);
+  }
+
+  @Test
+  public void test02BitPacking2Bit() throws IOException {
+    runTest(2);
+  }
+
+  @Test
+  public void test03BitPacking3Bit() throws IOException {
+    runTest(3);
+  }
+
+  @Test
+  public void test04BitPacking4Bit() throws IOException {
+    runTest(4);
+  }
+
+  @Test
+  public void test05BitPacking5Bit() throws IOException {
+    runTest(5);
+  }
+
+  @Test
+  public void test06BitPacking6Bit() throws IOException {
+    runTest(6);
+  }
+
+  @Test
+  public void test07BitPacking7Bit() throws IOException {
+    runTest(7);
+  }
+
+  @Test
+  public void test08BitPacking8Bit() throws IOException {
+    runTest(8);
+  }
+
+  @Test
+  public void test09BitPacking9Bit() throws IOException {
+    runTest(9);
+  }
+
+  @Test
+  public void test10BitPacking10Bit() throws IOException {
+    runTest(10);
+  }
+
+  @Test
+  public void test11BitPacking11Bit() throws IOException {
+    runTest(11);
+  }
+
+  @Test
+  public void test12BitPacking12Bit() throws IOException {
+    runTest(12);
+  }
+
+  @Test
+  public void test13BitPacking13Bit() throws IOException {
+    runTest(13);
+  }
+
+  @Test
+  public void test14BitPacking14Bit() throws IOException {
+    runTest(14);
+  }
+
+  @Test
+  public void test15BitPacking15Bit() throws IOException {
+    runTest(15);
+  }
+
+  @Test
+  public void test16BitPacking16Bit() throws IOException {
+    runTest(16);
+  }
+
+  @Test
+  public void test17BitPacking17Bit() throws IOException {
+    runTest(17);
+  }
+
+  @Test
+  public void test18BitPacking18Bit() throws IOException {
+    runTest(18);
+  }
+
+  @Test
+  public void test19BitPacking19Bit() throws IOException {
+    runTest(19);
+  }
+
+  @Test
+  public void test20BitPacking20Bit() throws IOException {
+    runTest(20);
+  }
+
+  @Test
+  public void test21BitPacking21Bit() throws IOException {
+    runTest(21);
+  }
+
+  @Test
+  public void test22BitPacking22Bit() throws IOException {
+    runTest(22);
+  }
+
+  @Test
+  public void test23BitPacking23Bit() throws IOException {
+    runTest(23);
+  }
+
+  @Test
+  public void test24BitPacking24Bit() throws IOException {
+    runTest(24);
+  }
+
+  @Test
+  public void test26BitPacking26Bit() throws IOException {
+    runTest(26);
+  }
+
+  @Test
+  public void test28BitPacking28Bit() throws IOException {
+    runTest(28);
+  }
+
+  @Test
+  public void test30BitPacking30Bit() throws IOException {
+    runTest(30);
+  }
+
+  @Test
+  public void test32BitPacking32Bit() throws IOException {
+    runTest(32);
+  }
+
+  @Test
+  public void test40BitPacking40Bit() throws IOException {
+    runTest(40);
+  }
+
+  @Test
+  public void test48BitPacking48Bit() throws IOException {
+    runTest(48);
+  }
+
+  @Test
+  public void test56BitPacking56Bit() throws IOException {
+    runTest(56);
+  }
+
+  @Test
+  public void test64BitPacking64Bit() throws IOException {
+    runTest(64);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java b/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java
new file mode 100644
index 0000000..4708043
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.TypeDescription;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TestColumnStatisticsImpl {
+
+  @Test
+  public void testUpdateDate() throws Exception {
+    ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(TypeDescription.createDate());
+    DateWritable date = new DateWritable(16400);
+    stat.increment();
+    stat.updateDate(date);
+    assertDateStatistics(stat, 1, 16400, 16400);
+
+    date.set(16410);
+    stat.increment();
+    stat.updateDate(date);
+    assertDateStatistics(stat, 2, 16400, 16410);
+
+    date.set(16420);
+    stat.increment();
+    stat.updateDate(date);
+    assertDateStatistics(stat, 3, 16400, 16420);
+  }
+
+  private void assertDateStatistics(ColumnStatisticsImpl stat, int count, int minimum, int maximum) {
+    OrcProto.ColumnStatistics.Builder builder = stat.serialize();
+
+    assertEquals(count, builder.getNumberOfValues());
+    assertTrue(builder.hasDateStatistics());
+    assertFalse(builder.hasStringStatistics());
+
+    OrcProto.DateStatistics protoStat = builder.getDateStatistics();
+    assertTrue(protoStat.hasMinimum());
+    assertEquals(minimum, protoStat.getMinimum());
+    assertTrue(protoStat.hasMaximum());
+    assertEquals(maximum, protoStat.getMaximum());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java b/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java
new file mode 100644
index 0000000..1e53f55
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.CompressionKind;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.mock;
+
+public class TestDataReaderProperties {
+
+  private FileSystem mockedFileSystem = mock(FileSystem.class);
+  private Path mockedPath = mock(Path.class);
+  private boolean mockedZeroCopy = false;
+
+  @Test
+  public void testCompleteBuild() {
+    DataReaderProperties properties = DataReaderProperties.builder()
+      .withFileSystem(mockedFileSystem)
+      .withPath(mockedPath)
+      .withCompression(CompressionKind.ZLIB)
+      .withZeroCopy(mockedZeroCopy)
+      .build();
+    assertEquals(mockedFileSystem, properties.getFileSystem());
+    assertEquals(mockedPath, properties.getPath());
+    assertEquals(CompressionKind.ZLIB, properties.getCompression());
+    assertEquals(mockedZeroCopy, properties.getZeroCopy());
+  }
+
+  @Test
+  public void testMissingNonRequiredArgs() {
+    DataReaderProperties properties = DataReaderProperties.builder()
+      .withFileSystem(mockedFileSystem)
+      .withPath(mockedPath)
+      .build();
+    assertEquals(mockedFileSystem, properties.getFileSystem());
+    assertEquals(mockedPath, properties.getPath());
+    assertNull(properties.getCompression());
+    assertFalse(properties.getZeroCopy());
+  }
+
+  @Test(expected = java.lang.NullPointerException.class)
+  public void testEmptyBuild() {
+    DataReaderProperties.builder().build();
+  }
+
+  @Test(expected = java.lang.NullPointerException.class)
+  public void testMissingPath() {
+    DataReaderProperties.builder()
+      .withFileSystem(mockedFileSystem)
+      .withCompression(CompressionKind.NONE)
+      .withZeroCopy(mockedZeroCopy)
+      .build();
+  }
+
+  @Test(expected = java.lang.NullPointerException.class)
+  public void testMissingFileSystem() {
+    DataReaderProperties.builder()
+      .withPath(mockedPath)
+      .withCompression(CompressionKind.NONE)
+      .withZeroCopy(mockedZeroCopy)
+      .build();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java b/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java
new file mode 100644
index 0000000..408be12
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestDynamicArray {
+
+  @Test
+  public void testByteArray() throws Exception {
+    DynamicByteArray dba = new DynamicByteArray(3, 10);
+    dba.add((byte) 0);
+    dba.add((byte) 1);
+    dba.set(3, (byte) 3);
+    dba.set(2, (byte) 2);
+    dba.add((byte) 4);
+    assertEquals("{0,1,2,3,4}", dba.toString());
+    assertEquals(5, dba.size());
+    byte[] val;
+    val = new byte[0];
+    assertEquals(0, dba.compare(val, 0, 0, 2, 0));
+    assertEquals(-1, dba.compare(val, 0, 0, 2, 1));
+    val = new byte[]{3,42};
+    assertEquals(1, dba.compare(val, 0, 1, 2, 0));
+    assertEquals(1, dba.compare(val, 0, 1, 2, 1));
+    assertEquals(0, dba.compare(val, 0, 1, 3, 1));
+    assertEquals(-1, dba.compare(val, 0, 1, 3, 2));
+    assertEquals(1, dba.compare(val, 0, 2, 3, 1));
+    val = new byte[256];
+    for(int b=-128; b < 128; ++b) {
+      dba.add((byte) b);
+      val[b+128] = (byte) b;
+    }
+    assertEquals(0, dba.compare(val, 0, 256, 5, 256));
+    assertEquals(1, dba.compare(val, 0, 1, 0, 1));
+    assertEquals(1, dba.compare(val, 254, 1, 0, 1));
+    assertEquals(1, dba.compare(val, 120, 1, 64, 1));
+    val = new byte[1024];
+    Random rand = new Random(1701);
+    for(int i = 0; i < val.length; ++i) {
+      rand.nextBytes(val);
+    }
+    dba.add(val, 0, 1024);
+    assertEquals(1285, dba.size());
+    assertEquals(0, dba.compare(val, 0, 1024, 261, 1024));
+  }
+
+  @Test
+  public void testIntArray() throws Exception {
+    DynamicIntArray dia = new DynamicIntArray(10);
+    for(int i=0; i < 10000; ++i) {
+      dia.add(2*i);
+    }
+    assertEquals(10000, dia.size());
+    for(int i=0; i < 10000; ++i) {
+      assertEquals(2*i, dia.get(i));
+    }
+    dia.clear();
+    assertEquals(0, dia.size());
+    dia.add(3);
+    dia.add(12);
+    dia.add(65);
+    assertEquals("{3,12,65}", dia.toString());
+    for(int i=0; i < 5; ++i) {
+      dia.increment(i, 3);
+    }
+    assertEquals("{6,15,68,3,3}", dia.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestInStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestInStream.java b/orc/src/test/org/apache/hive/orc/impl/TestInStream.java
new file mode 100644
index 0000000..ffaef54
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestInStream.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestInStream {
+
+  static class OutputCollector implements OutStream.OutputReceiver {
+    DynamicByteArray buffer = new DynamicByteArray();
+
+    @Override
+    public void output(ByteBuffer buffer) throws IOException {
+      this.buffer.add(buffer.array(), buffer.arrayOffset() + buffer.position(),
+          buffer.remaining());
+    }
+  }
+
+  static class PositionCollector
+      implements PositionProvider, PositionRecorder {
+    private List<Long> positions = new ArrayList<Long>();
+    private int index = 0;
+
+    @Override
+    public long getNext() {
+      return positions.get(index++);
+    }
+
+    @Override
+    public void addPosition(long offset) {
+      positions.add(offset);
+    }
+
+    public void reset() {
+      index = 0;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder builder = new StringBuilder("position: ");
+      for(int i=0; i < positions.size(); ++i) {
+        if (i != 0) {
+          builder.append(", ");
+        }
+        builder.append(positions.get(i));
+      }
+      return builder.toString();
+    }
+  }
+
+  @Test
+  public void testUncompressed() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    OutStream out = new OutStream("test", 100, null, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      out.write(i);
+    }
+    out.flush();
+    assertEquals(1024, collect.buffer.size());
+    for(int i=0; i < 1024; ++i) {
+      assertEquals((byte) i, collect.buffer.get(i));
+    }
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), null, 100);
+    assertEquals("uncompressed stream test position: 0 length: 1024" +
+                 " range: 0 offset: 0 limit: 0",
+                 in.toString());
+    for(int i=0; i < 1024; ++i) {
+      int x = in.read();
+      assertEquals(i & 0xff, x);
+    }
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i & 0xff, in.read());
+    }
+  }
+
+  @Test
+  public void testCompressed() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    CompressionCodec codec = new ZlibCodec();
+    OutStream out = new OutStream("test", 300, codec, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      out.write(i);
+    }
+    out.flush();
+    assertEquals("test", out.toString());
+    assertEquals(961, collect.buffer.size());
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), codec, 300);
+    assertEquals("compressed stream test position: 0 length: 961 range: 0" +
+                 " offset: 0 limit: 0 range 0 = 0 to 961",
+                 in.toString());
+    for(int i=0; i < 1024; ++i) {
+      int x = in.read();
+      assertEquals(i & 0xff, x);
+    }
+    assertEquals(0, in.available());
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i & 0xff, in.read());
+    }
+  }
+
+  @Test
+  public void testCorruptStream() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    CompressionCodec codec = new ZlibCodec();
+    OutStream out = new OutStream("test", 500, codec, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      out.write(i);
+    }
+    out.flush();
+
+    // now try to read the stream with a buffer that is too small
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), codec, 100);
+    byte[] contents = new byte[1024];
+    try {
+      in.read(contents);
+      fail();
+    } catch(IllegalArgumentException iae) {
+      // EXPECTED
+    }
+
+    // make a corrupted header
+    inBuf.clear();
+    inBuf.put((byte) 32);
+    inBuf.put((byte) 0);
+    inBuf.flip();
+    in = InStream.create("test2", new ByteBuffer[]{inBuf}, new long[]{0},
+        inBuf.remaining(), codec, 300);
+    try {
+      in.read();
+      fail();
+    } catch (IllegalStateException ise) {
+      // EXPECTED
+    }
+  }
+
+  @Test
+  public void testDisjointBuffers() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    CompressionCodec codec = new ZlibCodec();
+    OutStream out = new OutStream("test", 400, codec, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    DataOutput stream = new DataOutputStream(out);
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      stream.writeInt(i);
+    }
+    out.flush();
+    assertEquals("test", out.toString());
+    assertEquals(1674, collect.buffer.size());
+    ByteBuffer[] inBuf = new ByteBuffer[3];
+    inBuf[0] = ByteBuffer.allocate(500);
+    inBuf[1] = ByteBuffer.allocate(1200);
+    inBuf[2] = ByteBuffer.allocate(500);
+    collect.buffer.setByteBuffer(inBuf[0], 0, 483);
+    collect.buffer.setByteBuffer(inBuf[1], 483, 1625 - 483);
+    collect.buffer.setByteBuffer(inBuf[2], 1625, 1674 - 1625);
+
+    for(int i=0; i < inBuf.length; ++i) {
+      inBuf[i].flip();
+    }
+    InStream in = InStream.create("test", inBuf,
+        new long[]{0,483, 1625}, 1674, codec, 400);
+    assertEquals("compressed stream test position: 0 length: 1674 range: 0" +
+                 " offset: 0 limit: 0 range 0 = 0 to 483;" +
+                 "  range 1 = 483 to 1142;  range 2 = 1625 to 49",
+                 in.toString());
+    DataInputStream inStream = new DataInputStream(in);
+    for(int i=0; i < 1024; ++i) {
+      int x = inStream.readInt();
+      assertEquals(i, x);
+    }
+    assertEquals(0, in.available());
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
+        new long[]{483, 1625}, 1674, codec, 400);
+    inStream = new DataInputStream(in);
+    positions[303].reset();
+    in.seek(positions[303]);
+    for(int i=303; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
+        new long[]{0, 1625}, 1674, codec, 400);
+    inStream = new DataInputStream(in);
+    positions[1001].reset();
+    for(int i=0; i < 300; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+    in.seek(positions[1001]);
+    for(int i=1001; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+  }
+
+  @Test
+  public void testUncompressedDisjointBuffers() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    OutStream out = new OutStream("test", 400, null, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    DataOutput stream = new DataOutputStream(out);
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      stream.writeInt(i);
+    }
+    out.flush();
+    assertEquals("test", out.toString());
+    assertEquals(4096, collect.buffer.size());
+    ByteBuffer[] inBuf = new ByteBuffer[3];
+    inBuf[0] = ByteBuffer.allocate(1100);
+    inBuf[1] = ByteBuffer.allocate(2200);
+    inBuf[2] = ByteBuffer.allocate(1100);
+    collect.buffer.setByteBuffer(inBuf[0], 0, 1024);
+    collect.buffer.setByteBuffer(inBuf[1], 1024, 2048);
+    collect.buffer.setByteBuffer(inBuf[2], 3072, 1024);
+
+    for(int i=0; i < inBuf.length; ++i) {
+      inBuf[i].flip();
+    }
+    InStream in = InStream.create("test", inBuf,
+        new long[]{0, 1024, 3072}, 4096, null, 400);
+    assertEquals("uncompressed stream test position: 0 length: 4096" +
+                 " range: 0 offset: 0 limit: 0",
+                 in.toString());
+    DataInputStream inStream = new DataInputStream(in);
+    for(int i=0; i < 1024; ++i) {
+      int x = inStream.readInt();
+      assertEquals(i, x);
+    }
+    assertEquals(0, in.available());
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
+        new long[]{1024, 3072}, 4096, null, 400);
+    inStream = new DataInputStream(in);
+    positions[256].reset();
+    in.seek(positions[256]);
+    for(int i=256; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
+        new long[]{0, 3072}, 4096, null, 400);
+    inStream = new DataInputStream(in);
+    positions[768].reset();
+    for(int i=0; i < 256; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+    in.seek(positions[768]);
+    for(int i=768; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java b/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java
new file mode 100644
index 0000000..55d6893
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestIntegerCompressionReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
+        new OutStream("test", 1000, codec, collect), true);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[4096];
+    Random random = new Random(99);
+    int[] junk = new int[2048];
+    for(int i=0; i < junk.length; ++i) {
+      junk[i] = random.nextInt();
+    }
+    for(int i=0; i < 4096; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, incrementing runs, non-runs
+      if (i < 1024) {
+        out.write(i/4);
+      } else if (i < 2048) {
+        out.write(2*i);
+      } else {
+        out.write(junk[i-2048]);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReaderV2 in =
+      new RunLengthIntegerReaderV2(InStream.create
+                                   ("test", new ByteBuffer[]{inBuf},
+                                    new long[]{0}, inBuf.remaining(),
+                                    codec, 1000), true, false);
+    for(int i=0; i < 2048; ++i) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
+        new OutStream("test", 100, null, collect), true);
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write(i);
+      } else {
+        out.write(256 * i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReaderV2 in =
+      new RunLengthIntegerReaderV2(InStream.create("test",
+                                                   new ByteBuffer[]{inBuf},
+                                                   new long[]{0},
+                                                   inBuf.remaining(),
+                                                   null, 100), true, false);
+    for(int i=0; i < 2048; i += 10) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i, x);
+      } else {
+        assertEquals(256 * i, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java b/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java
new file mode 100644
index 0000000..290208c
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.junit.Test;
+import org.mockito.Matchers;
+import org.mockito.Mockito;
+
+import java.lang.management.ManagementFactory;
+
+import static junit.framework.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+/**
+ * Test the ORC memory manager.
+ */
+public class TestMemoryManager {
+  private static final double ERROR = 0.000001;
+
+  private static class NullCallback implements MemoryManager.Callback {
+    public boolean checkMemory(double newScale) {
+      return false;
+    }
+  }
+
+  @Test
+  public void testBasics() throws Exception {
+    Configuration conf = new Configuration();
+    MemoryManager mgr = new MemoryManager(conf);
+    NullCallback callback = new NullCallback();
+    long poolSize = mgr.getTotalMemoryPool();
+    assertEquals(Math.round(ManagementFactory.getMemoryMXBean().
+        getHeapMemoryUsage().getMax() * 0.5d), poolSize);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p1"), 1000, callback);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p1"), poolSize / 2, callback);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p2"), poolSize / 2, callback);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p3"), poolSize / 2, callback);
+    assertEquals(0.6666667, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p4"), poolSize / 2, callback);
+    assertEquals(0.5, mgr.getAllocationScale(), 0.000001);
+    mgr.addWriter(new Path("p4"), 3 * poolSize / 2, callback);
+    assertEquals(0.3333333, mgr.getAllocationScale(), 0.000001);
+    mgr.removeWriter(new Path("p1"));
+    mgr.removeWriter(new Path("p2"));
+    assertEquals(0.5, mgr.getAllocationScale(), 0.00001);
+    mgr.removeWriter(new Path("p4"));
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+  }
+
+  @Test
+  public void testConfig() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set("hive.exec.orc.memory.pool", "0.9");
+    MemoryManager mgr = new MemoryManager(conf);
+    long mem =
+        ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
+    System.err.print("Memory = " + mem);
+    long pool = mgr.getTotalMemoryPool();
+    assertTrue("Pool too small: " + pool, mem * 0.899 < pool);
+    assertTrue("Pool too big: " + pool, pool < mem * 0.901);
+  }
+
+  private static class DoubleMatcher extends BaseMatcher<Double> {
+    final double expected;
+    final double error;
+    DoubleMatcher(double expected, double error) {
+      this.expected = expected;
+      this.error = error;
+    }
+
+    @Override
+    public boolean matches(Object val) {
+      double dbl = (Double) val;
+      return Math.abs(dbl - expected) <= error;
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("not sufficiently close to ");
+      description.appendText(Double.toString(expected));
+    }
+  }
+
+  private static DoubleMatcher closeTo(double value, double error) {
+    return new DoubleMatcher(value, error);
+  }
+
+  @Test
+  public void testCallback() throws Exception {
+    Configuration conf = new Configuration();
+    MemoryManager mgr = new MemoryManager(conf);
+    long pool = mgr.getTotalMemoryPool();
+    MemoryManager.Callback[] calls = new MemoryManager.Callback[20];
+    for(int i=0; i < calls.length; ++i) {
+      calls[i] = Mockito.mock(MemoryManager.Callback.class);
+      mgr.addWriter(new Path(Integer.toString(i)), pool/4, calls[i]);
+    }
+    // add enough rows to get the memory manager to check the limits
+    for(int i=0; i < 10000; ++i) {
+      mgr.addedRow(1);
+    }
+    for(int call=0; call < calls.length; ++call) {
+      Mockito.verify(calls[call], Mockito.times(2))
+          .checkMemory(Matchers.doubleThat(closeTo(0.2, ERROR)));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java b/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java
new file mode 100644
index 0000000..e377a24
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class TestOrcWideTable {
+
+  @Test
+  public void testBufferSizeFor1Col() throws IOException {
+    assertEquals(128 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        1, 128*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor50Col() throws IOException {
+    assertEquals(256 * 1024, PhysicalFsWriter.getEstimatedBufferSize(256 * 1024 * 1024,
+        50, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor1000Col() throws IOException {
+    assertEquals(32 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        1000, 128*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor2000Col() throws IOException {
+    assertEquals(16 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        2000, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor4000Col() throws IOException {
+    assertEquals(8 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        4000, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor25000Col() throws IOException {
+    assertEquals(4 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        25000, 256*1024));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
new file mode 100644
index 0000000..23c13f4
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.nio.ByteBuffer;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestOutStream {
+
+  @Test
+  public void testFlush() throws Exception {
+    OutStream.OutputReceiver receiver =
+        Mockito.mock(OutStream.OutputReceiver.class);
+    CompressionCodec codec = new ZlibCodec();
+    OutStream stream = new OutStream("test", 128*1024, codec, receiver);
+    assertEquals(0L, stream.getBufferSize());
+    stream.write(new byte[]{0, 1, 2});
+    stream.flush();
+    Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class));
+    assertEquals(0L, stream.getBufferSize());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java b/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java
new file mode 100644
index 0000000..441a3a2
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.tools.FileDump;
+import org.apache.hive.orc.CompressionKind;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestRLEv2 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Path testFilePath;
+  Configuration conf;
+  FileSystem fs;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestRLEv2." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  void appendInt(VectorizedRowBatch batch, int i) {
+    ((LongColumnVector) batch.cols[0]).vector[batch.size++] = i;
+  }
+
+  @Test
+  public void testFixedDeltaZero() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, 123);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
+    // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaOne() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, i % 512);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
+    // and 1 byte delta (delta = 1). In total, 4 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaOneDescending() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, 512 - (i % 512));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
+    // and 1 byte delta (delta = 1). In total, 5 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaLarge() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, i % 512 + ((i % 512) * 100));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
+    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaLargeDescending() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, (512 - i % 512) + ((i % 512) * 100));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
+    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testShortRepeat() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5; ++i) {
+      appendInt(batch, 10);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 1 byte header + 1 byte value
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testDeltaUnknownSign() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    appendInt(batch, 0);
+    for (int i = 0; i < 511; ++i) {
+      appendInt(batch, i);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
+    // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
+    // each, 5120/8 = 640). Total bytes 642
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testPatchedBase() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+
+    Random rand = new Random(123);
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    appendInt(batch, 10000000);
+    for (int i = 0; i < 511; ++i) {
+      appendInt(batch, rand.nextInt(i+1));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // use PATCHED_BASE encoding
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
+    System.setOut(origOut);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java b/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java
new file mode 100644
index 0000000..c414e17
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hive.orc.FileFormatException;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.ExpectedException;
+
+public class TestReaderImpl {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private final Path path = new Path("test-file.orc");
+  private FSDataInputStream in;
+  private int psLen;
+  private ByteBuffer buffer;
+
+  @Before
+  public void setup() {
+    in = null;
+  }
+
+  @Test
+  public void testEnsureOrcFooterSmallTextFile() throws IOException {
+    prepareTestCase("1".getBytes());
+    thrown.expect(FileFormatException.class);
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  @Test
+  public void testEnsureOrcFooterLargeTextFile() throws IOException {
+    prepareTestCase("This is Some Text File".getBytes());
+    thrown.expect(FileFormatException.class);
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  @Test
+  public void testEnsureOrcFooter011ORCFile() throws IOException {
+    prepareTestCase(composeContent(OrcFile.MAGIC, "FOOTER"));
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  @Test
+  public void testEnsureOrcFooterCorrectORCFooter() throws IOException {
+    prepareTestCase(composeContent("", OrcFile.MAGIC));
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  private void prepareTestCase(byte[] bytes) {
+    buffer = ByteBuffer.wrap(bytes);
+    psLen = buffer.get(bytes.length - 1) & 0xff;
+    in = new FSDataInputStream(new SeekableByteArrayInputStream(bytes));
+  }
+
+  private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException {
+    ByteBuffer header = Text.encode(headerStr);
+    ByteBuffer footer = Text.encode(footerStr);
+    int headerLen = header.remaining();
+    int footerLen = footer.remaining() + 1;
+
+    ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen);
+
+    buf.put(header);
+    buf.put(footer);
+    buf.put((byte) footerLen);
+    return buf.array();
+  }
+
+  private static final class SeekableByteArrayInputStream extends ByteArrayInputStream
+          implements Seekable, PositionedReadable {
+
+    public SeekableByteArrayInputStream(byte[] buf) {
+      super(buf);
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      this.reset();
+      this.skip(pos);
+    }
+
+    @Override
+    public long getPos() throws IOException {
+      return pos;
+    }
+
+    @Override
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int read(long position, byte[] buffer, int offset, int length)
+            throws IOException {
+      long oldPos = getPos();
+      int nread = -1;
+      try {
+        seek(position);
+        nread = read(buffer, offset, length);
+      } finally {
+        seek(oldPos);
+      }
+      return nread;
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer, int offset, int length)
+            throws IOException {
+      int nread = 0;
+      while (nread < length) {
+        int nbytes = read(position + nread, buffer, offset + nread, length - nread);
+        if (nbytes < 0) {
+          throw new EOFException("End of file reached before reading fully.");
+        }
+        nread += nbytes;
+      }
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer)
+            throws IOException {
+      readFully(position, buffer, 0, buffer.length);
+    }
+  }
+}