You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2016/05/20 21:22:43 UTC

[05/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
deleted file mode 100644
index 41a211b..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcProto;
-
-import org.apache.orc.StripeInformation;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestStringDictionary {
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testTooManyDistinct() throws Exception {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    for (int i = 0; i < 20000; i++) {
-      writer.addRow(new Text(String.valueOf(i)));
-    }
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.hasNext()) {
-      Object row = rows.next(null);
-      assertEquals(new Text(String.valueOf(idx++)), row);
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testHalfDistinct() throws Exception {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    Random rand = new Random(123);
-    int[] input = new int[20000];
-    for (int i = 0; i < 20000; i++) {
-      input[i] = rand.nextInt(10000);
-    }
-
-    for (int i = 0; i < 20000; i++) {
-      writer.addRow(new Text(String.valueOf(input[i])));
-    }
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.hasNext()) {
-      Object row = rows.next(null);
-      assertEquals(new Text(String.valueOf(input[idx++])), row);
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testTooManyDistinctCheckDisabled() throws Exception {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-
-    conf.setBoolean(ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname, false);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    for (int i = 0; i < 20000; i++) {
-      writer.addRow(new Text(String.valueOf(i)));
-    }
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.hasNext()) {
-      Object row = rows.next(null);
-      assertEquals(new Text(String.valueOf(idx++)), row);
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testHalfDistinctCheckDisabled() throws Exception {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-
-    conf.setBoolean(ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname, false);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    Random rand = new Random(123);
-    int[] input = new int[20000];
-    for (int i = 0; i < 20000; i++) {
-      input[i] = rand.nextInt(10000);
-    }
-
-    for (int i = 0; i < 20000; i++) {
-      writer.addRow(new Text(String.valueOf(input[i])));
-    }
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.hasNext()) {
-      Object row = rows.next(null);
-      assertEquals(new Text(String.valueOf(input[idx++])), row);
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testTooManyDistinctV11AlwaysDictionary() throws Exception {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Text.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE)
-            .version(OrcFile.Version.V_0_11).bufferSize(10000));
-    for (int i = 0; i < 20000; i++) {
-      writer.addRow(new Text(String.valueOf(i)));
-    }
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.hasNext()) {
-      Object row = rows.next(null);
-      assertEquals(new Text(String.valueOf(idx++)), row);
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY, encoding.getKind());
-      }
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
deleted file mode 100644
index 96af65a..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.orc.TypeDescription;
-import org.junit.Test;
-
-public class TestTypeDescription {
-
-  @Test
-  public void testJson() {
-    TypeDescription bin = TypeDescription.createBinary();
-    assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}",
-        bin.toJson());
-    assertEquals("binary", bin.toString());
-    TypeDescription struct = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal());
-    assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>",
-        struct.toString());
-    assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, \"fields\": [\n"
-            + "  \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n"
-            + "  \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n"
-            + "  \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 38, \"scale\": 10}]}",
-        struct.toJson());
-    struct = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createByte())
-            .addUnionChild(TypeDescription.createDecimal()
-                .withPrecision(20).withScale(10)))
-        .addField("f2", TypeDescription.createStruct()
-            .addField("f3", TypeDescription.createDate())
-            .addField("f4", TypeDescription.createDouble())
-            .addField("f5", TypeDescription.createBoolean()))
-        .addField("f6", TypeDescription.createChar().withMaxLength(100));
-    assertEquals("struct<f1:uniontype<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
-        struct.toString());
-    assertEquals(
-        "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
-            "  \"f1\": {\"category\": \"uniontype\", \"id\": 1, \"max\": 3, \"children\": [\n" +
-            "    {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
-            "    {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" +
-            "  \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" +
-            "    \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" +
-            "    \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" +
-            "    \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 7}]},\n" +
-            "  \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, \"length\": 100}]}",
-        struct.toJson());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java
deleted file mode 100644
index 3251731..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestUnrolledBitPack.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.orc.CompressionKind;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-
-@RunWith(value = Parameterized.class)
-public class TestUnrolledBitPack {
-
-  private long val;
-
-  public TestUnrolledBitPack(long val) {
-    this.val = val;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { -1 }, { 1 }, { 7 }, { -128 }, { 32000 }, { 8300000 },
-        { Integer.MAX_VALUE }, { 540000000000L }, { 140000000000000L }, { 36000000000000000L },
-        { Long.MAX_VALUE } };
-    return Arrays.asList(data);
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testBitPacking() throws Exception {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-
-    long[] inp = new long[] { val, 0, val, val, 0, val, 0, val, val, 0, val, 0, val, val, 0, 0,
-        val, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val,
-        0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0,
-        0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0,
-        val, 0, val, 0, 0, val, 0, val, 0, 0, val, val };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(10000));
-    for (Long l : input) {
-      writer.addRow(l);
-    }
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.hasNext()) {
-      Object row = rows.next(null);
-      assertEquals(input.get(idx++).longValue(), ((LongWritable) row).get());
-    }
-  }
-
-}