You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/07/19 16:58:29 UTC
[06/37] hive git commit: HIVE-17118. Move the hive-orc source files
to make the package names unique.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestColumnStatistics.java b/orc/src/test/org/apache/orc/TestColumnStatistics.java
deleted file mode 100644
index 93d4bdb..0000000
--- a/orc/src/test/org/apache/orc/TestColumnStatistics.java
+++ /dev/null
@@ -1,365 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assume.assumeTrue;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.sql.Timestamp;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.tools.FileDump;
-import org.apache.orc.tools.TestFileDump;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-/**
- * Test ColumnStatisticsImpl for ORC.
- */
-public class TestColumnStatistics {
-
- @Test
- public void testLongMerge() throws Exception {
- TypeDescription schema = TypeDescription.createInt();
-
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
- stats1.updateInteger(10, 2);
- stats2.updateInteger(1, 1);
- stats2.updateInteger(1000, 1);
- stats1.merge(stats2);
- IntegerColumnStatistics typed = (IntegerColumnStatistics) stats1;
- assertEquals(1, typed.getMinimum());
- assertEquals(1000, typed.getMaximum());
- stats1.reset();
- stats1.updateInteger(-10, 1);
- stats1.updateInteger(10000, 1);
- stats1.merge(stats2);
- assertEquals(-10, typed.getMinimum());
- assertEquals(10000, typed.getMaximum());
- }
-
- @Test
- public void testDoubleMerge() throws Exception {
- TypeDescription schema = TypeDescription.createDouble();
-
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
- stats1.updateDouble(10.0);
- stats1.updateDouble(100.0);
- stats2.updateDouble(1.0);
- stats2.updateDouble(1000.0);
- stats1.merge(stats2);
- DoubleColumnStatistics typed = (DoubleColumnStatistics) stats1;
- assertEquals(1.0, typed.getMinimum(), 0.001);
- assertEquals(1000.0, typed.getMaximum(), 0.001);
- stats1.reset();
- stats1.updateDouble(-10);
- stats1.updateDouble(10000);
- stats1.merge(stats2);
- assertEquals(-10, typed.getMinimum(), 0.001);
- assertEquals(10000, typed.getMaximum(), 0.001);
- }
-
-
- @Test
- public void testStringMerge() throws Exception {
- TypeDescription schema = TypeDescription.createString();
-
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
- stats1.updateString(new Text("bob"));
- stats1.updateString(new Text("david"));
- stats1.updateString(new Text("charles"));
- stats2.updateString(new Text("anne"));
- byte[] erin = new byte[]{0, 1, 2, 3, 4, 5, 101, 114, 105, 110};
- stats2.updateString(erin, 6, 4, 5);
- assertEquals(24, ((StringColumnStatistics)stats2).getSum());
- stats1.merge(stats2);
- StringColumnStatistics typed = (StringColumnStatistics) stats1;
- assertEquals("anne", typed.getMinimum());
- assertEquals("erin", typed.getMaximum());
- assertEquals(39, typed.getSum());
- stats1.reset();
- stats1.updateString(new Text("aaa"));
- stats1.updateString(new Text("zzz"));
- stats1.merge(stats2);
- assertEquals("aaa", typed.getMinimum());
- assertEquals("zzz", typed.getMaximum());
- }
-
- @Test
- public void testDateMerge() throws Exception {
- TypeDescription schema = TypeDescription.createDate();
-
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
- stats1.updateDate(new DateWritable(1000));
- stats1.updateDate(new DateWritable(100));
- stats2.updateDate(new DateWritable(10));
- stats2.updateDate(new DateWritable(2000));
- stats1.merge(stats2);
- DateColumnStatistics typed = (DateColumnStatistics) stats1;
- assertEquals(new DateWritable(10).get(), typed.getMinimum());
- assertEquals(new DateWritable(2000).get(), typed.getMaximum());
- stats1.reset();
- stats1.updateDate(new DateWritable(-10));
- stats1.updateDate(new DateWritable(10000));
- stats1.merge(stats2);
- assertEquals(new DateWritable(-10).get(), typed.getMinimum());
- assertEquals(new DateWritable(10000).get(), typed.getMaximum());
- }
-
- @Test
- public void testTimestampMerge() throws Exception {
- TypeDescription schema = TypeDescription.createTimestamp();
-
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
- stats1.updateTimestamp(new Timestamp(10));
- stats1.updateTimestamp(new Timestamp(100));
- stats2.updateTimestamp(new Timestamp(1));
- stats2.updateTimestamp(new Timestamp(1000));
- stats1.merge(stats2);
- TimestampColumnStatistics typed = (TimestampColumnStatistics) stats1;
- assertEquals(1, typed.getMinimum().getTime());
- assertEquals(1000, typed.getMaximum().getTime());
- stats1.reset();
- stats1.updateTimestamp(new Timestamp(-10));
- stats1.updateTimestamp(new Timestamp(10000));
- stats1.merge(stats2);
- assertEquals(-10, typed.getMinimum().getTime());
- assertEquals(10000, typed.getMaximum().getTime());
- }
-
- @Test
- public void testDecimalMerge() throws Exception {
- TypeDescription schema = TypeDescription.createDecimal()
- .withPrecision(38).withScale(16);
-
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
- stats1.updateDecimal(new HiveDecimalWritable(10));
- stats1.updateDecimal(new HiveDecimalWritable(100));
- stats2.updateDecimal(new HiveDecimalWritable(1));
- stats2.updateDecimal(new HiveDecimalWritable(1000));
- stats1.merge(stats2);
- DecimalColumnStatistics typed = (DecimalColumnStatistics) stats1;
- assertEquals(1, typed.getMinimum().longValue());
- assertEquals(1000, typed.getMaximum().longValue());
- stats1.reset();
- stats1.updateDecimal(new HiveDecimalWritable(-10));
- stats1.updateDecimal(new HiveDecimalWritable(10000));
- stats1.merge(stats2);
- assertEquals(-10, typed.getMinimum().longValue());
- assertEquals(10000, typed.getMaximum().longValue());
- }
-
-
- Path workDir = new Path(System.getProperty("test.tmp.dir",
- "target" + File.separator + "test" + File.separator + "tmp"));
-
- Configuration conf;
- FileSystem fs;
- Path testFilePath;
-
- @Rule
- public TestName testCaseName = new TestName();
-
- @Before
- public void openFileSystem() throws Exception {
- conf = new Configuration();
- fs = FileSystem.getLocal(conf);
- fs.setWorkingDirectory(workDir);
- testFilePath = new Path("TestOrcFile." + testCaseName.getMethodName() + ".orc");
- fs.delete(testFilePath, false);
- }
-
- private static BytesWritable bytes(int... items) {
- BytesWritable result = new BytesWritable();
- result.setSize(items.length);
- for (int i = 0; i < items.length; ++i) {
- result.getBytes()[i] = (byte) items[i];
- }
- return result;
- }
-
- void appendRow(VectorizedRowBatch batch, BytesWritable bytes,
- String str) {
- int row = batch.size++;
- if (bytes == null) {
- batch.cols[0].noNulls = false;
- batch.cols[0].isNull[row] = true;
- } else {
- ((BytesColumnVector) batch.cols[0]).setVal(row, bytes.getBytes(),
- 0, bytes.getLength());
- }
- if (str == null) {
- batch.cols[1].noNulls = false;
- batch.cols[1].isNull[row] = true;
- } else {
- ((BytesColumnVector) batch.cols[1]).setVal(row, str.getBytes());
- }
- }
-
- @Test
- public void testHasNull() throws Exception {
- TypeDescription schema =
- TypeDescription.createStruct()
- .addField("bytes1", TypeDescription.createBinary())
- .addField("string1", TypeDescription.createString());
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .rowIndexStride(1000)
- .stripeSize(10000)
- .bufferSize(10000));
- VectorizedRowBatch batch = schema.createRowBatch(5000);
- // STRIPE 1
- // RG1
- for(int i=0; i<1000; i++) {
- appendRow(batch, bytes(1, 2, 3), "RG1");
- }
- writer.addRowBatch(batch);
- batch.reset();
- // RG2
- for(int i=0; i<1000; i++) {
- appendRow(batch, bytes(1, 2, 3), null);
- }
- writer.addRowBatch(batch);
- batch.reset();
- // RG3
- for(int i=0; i<1000; i++) {
- appendRow(batch, bytes(1, 2, 3), "RG3");
- }
- writer.addRowBatch(batch);
- batch.reset();
- // RG4
- for (int i = 0; i < 1000; i++) {
- appendRow(batch, bytes(1,2,3), null);
- }
- writer.addRowBatch(batch);
- batch.reset();
- // RG5
- for(int i=0; i<1000; i++) {
- appendRow(batch, bytes(1, 2, 3), null);
- }
- writer.addRowBatch(batch);
- batch.reset();
- // STRIPE 2
- for (int i = 0; i < 5000; i++) {
- appendRow(batch, bytes(1,2,3), null);
- }
- writer.addRowBatch(batch);
- batch.reset();
- // STRIPE 3
- for (int i = 0; i < 5000; i++) {
- appendRow(batch, bytes(1,2,3), "STRIPE-3");
- }
- writer.addRowBatch(batch);
- batch.reset();
- // STRIPE 4
- for (int i = 0; i < 5000; i++) {
- appendRow(batch, bytes(1,2,3), null);
- }
- writer.addRowBatch(batch);
- batch.reset();
- writer.close();
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
-
- // check the file level stats
- ColumnStatistics[] stats = reader.getStatistics();
- assertEquals(20000, stats[0].getNumberOfValues());
- assertEquals(20000, stats[1].getNumberOfValues());
- assertEquals(7000, stats[2].getNumberOfValues());
- assertEquals(false, stats[0].hasNull());
- assertEquals(false, stats[1].hasNull());
- assertEquals(true, stats[2].hasNull());
-
- // check the stripe level stats
- List<StripeStatistics> stripeStats = reader.getStripeStatistics();
- // stripe 1 stats
- StripeStatistics ss1 = stripeStats.get(0);
- ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
- ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
- ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];
- assertEquals(false, ss1_cs1.hasNull());
- assertEquals(false, ss1_cs2.hasNull());
- assertEquals(true, ss1_cs3.hasNull());
-
- // stripe 2 stats
- StripeStatistics ss2 = stripeStats.get(1);
- ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
- ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
- ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];
- assertEquals(false, ss2_cs1.hasNull());
- assertEquals(false, ss2_cs2.hasNull());
- assertEquals(true, ss2_cs3.hasNull());
-
- // stripe 3 stats
- StripeStatistics ss3 = stripeStats.get(2);
- ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
- ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
- ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];
- assertEquals(false, ss3_cs1.hasNull());
- assertEquals(false, ss3_cs2.hasNull());
- assertEquals(false, ss3_cs3.hasNull());
-
- // stripe 4 stats
- StripeStatistics ss4 = stripeStats.get(3);
- ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
- ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
- ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];
- assertEquals(false, ss4_cs1.hasNull());
- assertEquals(false, ss4_cs2.hasNull());
- assertEquals(true, ss4_cs3.hasNull());
-
- // Test file dump
- PrintStream origOut = System.out;
- String outputFilename = "orc-file-has-null.out";
- FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
- // replace stdout and run command
- System.setOut(new PrintStream(myOut));
- FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
- System.out.flush();
- System.setOut(origOut);
- // If called with an expression evaluating to false, the test will halt
- // and be ignored.
- assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
- TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java b/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
deleted file mode 100644
index 526dd81..0000000
--- a/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
+++ /dev/null
@@ -1,1373 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-
-@RunWith(value = Parameterized.class)
-public class TestNewIntegerEncoding {
-
- private OrcFile.EncodingStrategy encodingStrategy;
-
- public TestNewIntegerEncoding( OrcFile.EncodingStrategy es) {
- this.encodingStrategy = es;
- }
-
- @Parameters
- public static Collection<Object[]> data() {
- Object[][] data = new Object[][] { { OrcFile.EncodingStrategy.COMPRESSION },
- { OrcFile.EncodingStrategy.SPEED } };
- return Arrays.asList(data);
- }
-
- public static class TSRow {
- Timestamp ts;
-
- public TSRow(Timestamp ts) {
- this.ts = ts;
- }
- }
-
- public static TypeDescription getRowSchema() {
- return TypeDescription.createStruct()
- .addField("int1", TypeDescription.createInt())
- .addField("long1", TypeDescription.createLong());
- }
-
- public static void appendRow(VectorizedRowBatch batch,
- int int1, long long1) {
- int row = batch.size++;
- ((LongColumnVector) batch.cols[0]).vector[row] = int1;
- ((LongColumnVector) batch.cols[1]).vector[row] = long1;
- }
-
- public static void appendLong(VectorizedRowBatch batch,
- long long1) {
- int row = batch.size++;
- ((LongColumnVector) batch.cols[0]).vector[row] = long1;
- }
-
- Path workDir = new Path(System.getProperty("test.tmp.dir", "target"
- + File.separator + "test" + File.separator + "tmp"));
-
- Configuration conf;
- FileSystem fs;
- Path testFilePath;
-
- @Rule
- public TestName testCaseName = new TestName();
-
- @Before
- public void openFileSystem() throws Exception {
- conf = new Configuration();
- fs = FileSystem.getLocal(conf);
- testFilePath = new Path(workDir, "TestOrcFile."
- + testCaseName.getMethodName() + ".orc");
- fs.delete(testFilePath, false);
- }
-
- @Test
- public void testBasicRow() throws Exception {
- TypeDescription schema= getRowSchema();
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- appendRow(batch, 111, 1111L);
- appendRow(batch, 111, 1111L);
- appendRow(batch, 111, 1111L);
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(111, ((LongColumnVector) batch.cols[0]).vector[r]);
- assertEquals(1111, ((LongColumnVector) batch.cols[1]).vector[r]);
- }
- }
- }
-
- @Test
- public void testBasicOld() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
- long[] inp = new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6,
- 7, 8, 9, 10, 1, 1, 1, 1, 1, 1, 10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1,
- 2, 5, 1, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1,
- 9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
- 1, 1, 1, 1 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .compress(CompressionKind.NONE)
- .version(OrcFile.Version.V_0_11)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- int idx = 0;
- batch = reader.getSchema().createRowBatch();
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testBasicNew() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6,
- 7, 8, 9, 10, 1, 1, 1, 1, 1, 1, 10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1,
- 2, 5, 1, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1,
- 9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
- 1, 1, 1, 1 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- int idx = 0;
- batch = reader.getSchema().createRowBatch();
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testBasicDelta1() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { -500, -400, -350, -325, -310 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testBasicDelta2() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { -500, -600, -650, -675, -710 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testBasicDelta3() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 500, 400, 350, 325, 310 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testBasicDelta4() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 500, 600, 650, 675, 710 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testDeltaOverflow() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[]{4513343538618202719l, 4513343538618202711l,
- 2911390882471569739l,
- -9181829309989854913l};
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(10000));
- VectorizedRowBatch batch = schema.createRowBatch();
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile
- .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testDeltaOverflow2() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[]{Long.MAX_VALUE, 4513343538618202711l,
- 2911390882471569739l,
- Long.MIN_VALUE};
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(10000));
- VectorizedRowBatch batch = schema.createRowBatch();
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile
- .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testDeltaOverflow3() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[]{-4513343538618202711l, -2911390882471569739l, -2,
- Long.MAX_VALUE};
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(10000));
- VectorizedRowBatch batch = schema.createRowBatch();
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile
- .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testIntegerMin() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- input.add((long) Integer.MIN_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testIntegerMax() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- input.add((long) Integer.MAX_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testLongMin() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- input.add(Long.MIN_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testLongMax() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- input.add(Long.MAX_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testRandomInt() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 100000; i++) {
- input.add((long) rand.nextInt());
- }
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(100000);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testRandomLong() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 100000; i++) {
- input.add(rand.nextLong());
- }
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(100000);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseNegativeMin() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
- 3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
- 1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
- 52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
- 2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -13, 1, 2, 3,
- 13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
- 141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
- 13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
- 1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
- 2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
- 1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
- 2, 16 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseNegativeMin2() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
- 3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
- 1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
- 52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
- 2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -1, 1, 2, 3,
- 13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
- 141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
- 13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
- 1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
- 2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
- 1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
- 2, 16 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseNegativeMin3() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
- 3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
- 1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
- 52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
- 2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, 0, 1, 2, 3,
- 13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
- 141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
- 13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
- 1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
- 2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
- 1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
- 2, 16 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseNegativeMin4() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- long[] inp = new long[] { 13, 13, 11, 8, 13, 10, 10, 11, 11, 14, 11, 7, 13,
- 12, 12, 11, 15, 12, 12, 9, 8, 10, 13, 11, 8, 6, 5, 6, 11, 7, 15, 10, 7,
- 6, 8, 7, 9, 9, 11, 33, 11, 3, 7, 4, 6, 10, 14, 12, 5, 14, 7, 6 };
- List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseAt0() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(100));
- }
- input.set(0, 20000L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseAt1() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(100));
- }
- input.set(1, 20000L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseAt255() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(100));
- }
- input.set(255, 20000L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseAt256() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(100));
- }
- input.set(256, 20000L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBase510() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(100));
- }
- input.set(510, 20000L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBase511() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(100));
- }
- input.set(511, 20000L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseMax1() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for (int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(60));
- }
- input.set(511, Long.MAX_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseMax2() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for (int i = 0; i < 5120; i++) {
- input.add((long) rand.nextInt(60));
- }
- input.set(128, Long.MAX_VALUE);
- input.set(256, Long.MAX_VALUE);
- input.set(511, Long.MAX_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(5120);
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseMax3() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- input.add(371946367L);
- input.add(11963367L);
- input.add(68639400007L);
- input.add(100233367L);
- input.add(6367L);
- input.add(10026367L);
- input.add(3670000L);
- input.add(3602367L);
- input.add(4719226367L);
- input.add(7196367L);
- input.add(444442L);
- input.add(210267L);
- input.add(21033L);
- input.add(160267L);
- input.add(400267L);
- input.add(23634347L);
- input.add(16027L);
- input.add(46026367L);
- input.add(Long.MAX_VALUE);
- input.add(33333L);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseMax4() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- for (int i = 0; i < 25; i++) {
- input.add(371292224226367L);
- input.add(119622332222267L);
- input.add(686329400222007L);
- input.add(100233333222367L);
- input.add(636272333322222L);
- input.add(10202633223267L);
- input.add(36700222022230L);
- input.add(36023226224227L);
- input.add(47192226364427L);
- input.add(71963622222447L);
- input.add(22244444222222L);
- input.add(21220263327442L);
- input.add(21032233332232L);
- input.add(16026322232227L);
- input.add(40022262272212L);
- input.add(23634342227222L);
- input.add(16022222222227L);
- input.add(46026362222227L);
- input.add(46026362222227L);
- input.add(33322222222323L);
- }
- input.add(Long.MAX_VALUE);
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
- for (Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-
- @Test
- public void testPatchedBaseTimestamp() throws Exception {
- TypeDescription schema = TypeDescription.createStruct()
- .addField("ts", TypeDescription.createTimestamp());
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
-
- List<Timestamp> tslist = Lists.newArrayList();
- tslist.add(Timestamp.valueOf("2099-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("1999-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("1995-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2010-03-02 00:00:00"));
- tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("1996-08-02 00:00:00"));
- tslist.add(Timestamp.valueOf("1998-11-02 00:00:00"));
- tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
- tslist.add(Timestamp.valueOf("1993-08-02 00:00:00"));
- tslist.add(Timestamp.valueOf("2008-01-02 00:00:00"));
- tslist.add(Timestamp.valueOf("2007-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
- tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2008-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("1994-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2001-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2000-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2000-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2011-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
- tslist.add(Timestamp.valueOf("1974-01-01 00:00:00"));
- int idx = 0;
- for (Timestamp ts : tslist) {
- ((TimestampColumnVector) batch.cols[0]).set(idx, ts);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(tslist.get(idx++),
- ((TimestampColumnVector) batch.cols[0]).asScratchTimestamp(r));
- }
- }
- }
-
- @Test
- public void testDirectLargeNegatives() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch();
-
- appendLong(batch, -7486502418706614742L);
- appendLong(batch, 0L);
- appendLong(batch, 1L);
- appendLong(batch, 1L);
- appendLong(batch, -5535739865598783616L);
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- assertEquals(true, rows.nextBatch(batch));
- assertEquals(5, batch.size);
- assertEquals(-7486502418706614742L,
- ((LongColumnVector) batch.cols[0]).vector[0]);
- assertEquals(0L,
- ((LongColumnVector) batch.cols[0]).vector[1]);
- assertEquals(1L,
- ((LongColumnVector) batch.cols[0]).vector[2]);
- assertEquals(1L,
- ((LongColumnVector) batch.cols[0]).vector[3]);
- assertEquals(-5535739865598783616L,
- ((LongColumnVector) batch.cols[0]).vector[4]);
- assertEquals(false, rows.nextBatch(batch));
- }
-
- @Test
- public void testSeek() throws Exception {
- TypeDescription schema = TypeDescription.createLong();
-
- List<Long> input = Lists.newArrayList();
- Random rand = new Random();
- for(int i = 0; i < 100000; i++) {
- input.add((long) rand.nextInt());
- }
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .compress(CompressionKind.NONE)
- .stripeSize(100000)
- .bufferSize(10000)
- .version(OrcFile.Version.V_0_11)
- .encodingStrategy(encodingStrategy));
- VectorizedRowBatch batch = schema.createRowBatch(100000);
- for(Long l : input) {
- appendLong(batch, l);
- }
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- int idx = 55555;
- rows.seekToRow(idx);
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(input.get(idx++).longValue(),
- ((LongColumnVector) batch.cols[0]).vector[r]);
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestOrcNullOptimization.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestOrcNullOptimization.java b/orc/src/test/org/apache/orc/TestOrcNullOptimization.java
deleted file mode 100644
index 0b605c9..0000000
--- a/orc/src/test/org/apache/orc/TestOrcNullOptimization.java
+++ /dev/null
@@ -1,415 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.Random;
-
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import org.apache.orc.impl.RecordReaderImpl;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-import com.google.common.collect.Lists;
-
-public class TestOrcNullOptimization {
-
- TypeDescription createMyStruct() {
- return TypeDescription.createStruct()
- .addField("a", TypeDescription.createInt())
- .addField("b", TypeDescription.createString())
- .addField("c", TypeDescription.createBoolean())
- .addField("d", TypeDescription.createList(
- TypeDescription.createStruct()
- .addField("z", TypeDescription.createInt())));
- }
-
- void addRow(Writer writer, VectorizedRowBatch batch,
- Integer a, String b, Boolean c,
- Integer... d) throws IOException {
- if (batch.size == batch.getMaxSize()) {
- writer.addRowBatch(batch);
- batch.reset();
- }
- int row = batch.size++;
- LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
- BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
- LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
- ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
- StructColumnVector dStruct = (StructColumnVector) dColumn.child;
- LongColumnVector dInt = (LongColumnVector) dStruct.fields[0];
- if (a == null) {
- aColumn.noNulls = false;
- aColumn.isNull[row] = true;
- } else {
- aColumn.vector[row] = a;
- }
- if (b == null) {
- bColumn.noNulls = false;
- bColumn.isNull[row] = true;
- } else {
- bColumn.setVal(row, b.getBytes());
- }
- if (c == null) {
- cColumn.noNulls = false;
- cColumn.isNull[row] = true;
- } else {
- cColumn.vector[row] = c ? 1 : 0;
- }
- if (d == null) {
- dColumn.noNulls = false;
- dColumn.isNull[row] = true;
- } else {
- dColumn.offsets[row] = dColumn.childCount;
- dColumn.lengths[row] = d.length;
- dColumn.childCount += d.length;
- for(int e=0; e < d.length; ++e) {
- dInt.vector[(int) dColumn.offsets[row] + e] = d[e];
- }
- }
- }
-
- Path workDir = new Path(System.getProperty("test.tmp.dir",
- "target" + File.separator + "test" + File.separator + "tmp"));
-
- Configuration conf;
- FileSystem fs;
- Path testFilePath;
-
- @Rule
- public TestName testCaseName = new TestName();
-
- @Before
- public void openFileSystem() throws Exception {
- conf = new Configuration();
- fs = FileSystem.getLocal(conf);
- testFilePath = new Path(workDir, "TestOrcNullOptimization." +
- testCaseName.getMethodName() + ".orc");
- fs.delete(testFilePath, false);
- }
-
- @Test
- public void testMultiStripeWithNull() throws Exception {
- TypeDescription schema = createMyStruct();
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000));
- Random rand = new Random(100);
- VectorizedRowBatch batch = schema.createRowBatch();
- addRow(writer, batch, null, null, true, 100);
- for (int i = 2; i < 20000; i++) {
- addRow(writer, batch, rand.nextInt(1), "a", true, 100);
- }
- addRow(writer, batch, null, null, true, 100);
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- // check the stats
- ColumnStatistics[] stats = reader.getStatistics();
- assertEquals(20000, reader.getNumberOfRows());
- assertEquals(20000, stats[0].getNumberOfValues());
-
- assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMaximum());
- assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
- assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
- assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
- assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0",
- stats[1].toString());
-
- assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
- assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
- assertEquals(19998, stats[2].getNumberOfValues());
- assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998",
- stats[2].toString());
-
- // check the inspectors
- assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
- reader.getSchema().toString());
-
- RecordReader rows = reader.rows();
-
- List<Boolean> expected = Lists.newArrayList();
- for (StripeInformation sinfo : reader.getStripes()) {
- expected.add(false);
- }
- // only the first and last stripe will have PRESENT stream
- expected.set(0, true);
- expected.set(expected.size() - 1, true);
-
- List<Boolean> got = Lists.newArrayList();
- // check if the strip footer contains PRESENT stream
- for (StripeInformation sinfo : reader.getStripes()) {
- OrcProto.StripeFooter sf =
- ((RecordReaderImpl) rows).readStripeFooter(sinfo);
- got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
- != -1);
- }
- assertEquals(expected, got);
-
- batch = reader.getSchema().createRowBatch();
- LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
- BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
- LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
- ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
- LongColumnVector dElements =
- (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
- assertEquals(true , rows.nextBatch(batch));
- assertEquals(1024, batch.size);
-
- // row 1
- assertEquals(true, aColumn.isNull[0]);
- assertEquals(true, bColumn.isNull[0]);
- assertEquals(1, cColumn.vector[0]);
- assertEquals(0, dColumn.offsets[0]);
- assertEquals(1, dColumn.lengths[1]);
- assertEquals(100, dElements.vector[0]);
-
- rows.seekToRow(19998);
- rows.nextBatch(batch);
- assertEquals(2, batch.size);
-
- // last-1 row
- assertEquals(0, aColumn.vector[0]);
- assertEquals("a", bColumn.toString(0));
- assertEquals(1, cColumn.vector[0]);
- assertEquals(0, dColumn.offsets[0]);
- assertEquals(1, dColumn.lengths[0]);
- assertEquals(100, dElements.vector[0]);
-
- // last row
- assertEquals(true, aColumn.isNull[1]);
- assertEquals(true, bColumn.isNull[1]);
- assertEquals(1, cColumn.vector[1]);
- assertEquals(1, dColumn.offsets[1]);
- assertEquals(1, dColumn.lengths[1]);
- assertEquals(100, dElements.vector[1]);
-
- assertEquals(false, rows.nextBatch(batch));
- rows.close();
- }
-
- @Test
- public void testMultiStripeWithoutNull() throws Exception {
- TypeDescription schema = createMyStruct();
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .compress(CompressionKind.NONE)
- .bufferSize(10000));
- Random rand = new Random(100);
- VectorizedRowBatch batch = schema.createRowBatch();
- for (int i = 1; i < 20000; i++) {
- addRow(writer, batch, rand.nextInt(1), "a", true, 100);
- }
- addRow(writer, batch, 0, "b", true, 100);
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- // check the stats
- ColumnStatistics[] stats = reader.getStatistics();
- assertEquals(20000, reader.getNumberOfRows());
- assertEquals(20000, stats[0].getNumberOfValues());
-
- assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMaximum());
- assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
- assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
- assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
- assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0",
- stats[1].toString());
-
- assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
- assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
- assertEquals(20000, stats[2].getNumberOfValues());
- assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000",
- stats[2].toString());
-
- // check the inspectors
- Assert.assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
- reader.getSchema().toString());
-
- RecordReader rows = reader.rows();
-
- // none of the stripes will have PRESENT stream
- List<Boolean> expected = Lists.newArrayList();
- for (StripeInformation sinfo : reader.getStripes()) {
- expected.add(false);
- }
-
- List<Boolean> got = Lists.newArrayList();
- // check if the strip footer contains PRESENT stream
- for (StripeInformation sinfo : reader.getStripes()) {
- OrcProto.StripeFooter sf =
- ((RecordReaderImpl) rows).readStripeFooter(sinfo);
- got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
- != -1);
- }
- assertEquals(expected, got);
-
- rows.seekToRow(19998);
-
- batch = reader.getSchema().createRowBatch();
- LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
- BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
- LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
- ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
- LongColumnVector dElements =
- (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
-
- assertEquals(true, rows.nextBatch(batch));
- assertEquals(2, batch.size);
-
- // last-1 row
- assertEquals(0, aColumn.vector[0]);
- assertEquals("a", bColumn.toString(0));
- assertEquals(1, cColumn.vector[0]);
- assertEquals(0, dColumn.offsets[0]);
- assertEquals(1, dColumn.lengths[0]);
- assertEquals(100, dElements.vector[0]);
-
- // last row
- assertEquals(0, aColumn.vector[1]);
- assertEquals("b", bColumn.toString(1));
- assertEquals(1, cColumn.vector[1]);
- assertEquals(1, dColumn.offsets[1]);
- assertEquals(1, dColumn.lengths[1]);
- assertEquals(100, dElements.vector[1]);
- rows.close();
- }
-
- @Test
- public void testColumnsWithNullAndCompression() throws Exception {
- TypeDescription schema = createMyStruct();
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .setSchema(schema)
- .stripeSize(100000)
- .bufferSize(10000));
- VectorizedRowBatch batch = schema.createRowBatch();
- addRow(writer, batch, 3, "a", true, 100);
- addRow(writer, batch, null, "b", true, 100);
- addRow(writer, batch, 3, null, false, 100);
- addRow(writer, batch, 3, "d", true, 100);
- addRow(writer, batch, 2, "e", true, 100);
- addRow(writer, batch, 2, "f", true, 100);
- addRow(writer, batch, 2, "g", true, 100);
- addRow(writer, batch, 2, "h", true, 100);
- writer.addRowBatch(batch);
- writer.close();
-
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- // check the stats
- ColumnStatistics[] stats = reader.getStatistics();
- assertEquals(8, reader.getNumberOfRows());
- assertEquals(8, stats[0].getNumberOfValues());
-
- assertEquals(3, ((IntegerColumnStatistics) stats[1]).getMaximum());
- assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum());
- assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
- assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum());
- assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17",
- stats[1].toString());
-
- assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
- assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
- assertEquals(7, stats[2].getNumberOfValues());
- assertEquals("count: 7 hasNull: true min: a max: h sum: 7",
- stats[2].toString());
-
- // check the inspectors
- batch = reader.getSchema().createRowBatch();
- LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
- BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
- LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
- ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
- LongColumnVector dElements =
- (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
- Assert.assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
- reader.getSchema().toString());
-
- RecordReader rows = reader.rows();
- // only the last strip will have PRESENT stream
- List<Boolean> expected = Lists.newArrayList();
- for (StripeInformation sinfo : reader.getStripes()) {
- expected.add(false);
- }
- expected.set(expected.size() - 1, true);
-
- List<Boolean> got = Lists.newArrayList();
- // check if the strip footer contains PRESENT stream
- for (StripeInformation sinfo : reader.getStripes()) {
- OrcProto.StripeFooter sf =
- ((RecordReaderImpl) rows).readStripeFooter(sinfo);
- got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
- != -1);
- }
- assertEquals(expected, got);
-
- assertEquals(true, rows.nextBatch(batch));
- assertEquals(8, batch.size);
-
- // row 1
- assertEquals(3, aColumn.vector[0]);
- assertEquals("a", bColumn.toString(0));
- assertEquals(1, cColumn.vector[0]);
- assertEquals(0, dColumn.offsets[0]);
- assertEquals(1, dColumn.lengths[0]);
- assertEquals(100, dElements.vector[0]);
-
- // row 2
- assertEquals(true, aColumn.isNull[1]);
- assertEquals("b", bColumn.toString(1));
- assertEquals(1, cColumn.vector[1]);
- assertEquals(1, dColumn.offsets[1]);
- assertEquals(1, dColumn.lengths[1]);
- assertEquals(100, dElements.vector[1]);
-
- // row 3
- assertEquals(3, aColumn.vector[2]);
- assertEquals(true, bColumn.isNull[2]);
- assertEquals(0, cColumn.vector[2]);
- assertEquals(2, dColumn.offsets[2]);
- assertEquals(1, dColumn.lengths[2]);
- assertEquals(100, dElements.vector[2]);
-
- rows.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestOrcTimezone1.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestOrcTimezone1.java b/orc/src/test/org/apache/orc/TestOrcTimezone1.java
deleted file mode 100644
index 72dc455..0000000
--- a/orc/src/test/org/apache/orc/TestOrcTimezone1.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertNotNull;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.TimeZone;
-
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import com.google.common.collect.Lists;
-
-/**
- *
- */
-@RunWith(Parameterized.class)
-public class TestOrcTimezone1 {
- Path workDir = new Path(System.getProperty("test.tmp.dir",
- "target" + File.separator + "test" + File.separator + "tmp"));
- Configuration conf;
- FileSystem fs;
- Path testFilePath;
- String writerTimeZone;
- String readerTimeZone;
- static TimeZone defaultTimeZone = TimeZone.getDefault();
-
- public TestOrcTimezone1(String writerTZ, String readerTZ) {
- this.writerTimeZone = writerTZ;
- this.readerTimeZone = readerTZ;
- }
-
- @Parameterized.Parameters
- public static Collection<Object[]> data() {
- List<Object[]> result = Arrays.asList(new Object[][]{
- /* Extreme timezones */
- {"GMT-12:00", "GMT+14:00"},
- /* No difference in DST */
- {"America/Los_Angeles", "America/Los_Angeles"}, /* same timezone both with DST */
- {"Europe/Berlin", "Europe/Berlin"}, /* same as above but europe */
- {"America/Phoenix", "Asia/Kolkata"} /* Writer no DST, Reader no DST */,
- {"Europe/Berlin", "America/Los_Angeles"} /* Writer DST, Reader DST */,
- {"Europe/Berlin", "America/Chicago"} /* Writer DST, Reader DST */,
- /* With DST difference */
- {"Europe/Berlin", "UTC"},
- {"UTC", "Europe/Berlin"} /* Writer no DST, Reader DST */,
- {"America/Los_Angeles", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
- {"Europe/Berlin", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
- /* Timezone offsets for the reader has changed historically */
- {"Asia/Saigon", "Pacific/Enderbury"},
- {"UTC", "Asia/Jerusalem"},
-
- // NOTE:
- // "1995-01-01 03:00:00.688888888" this is not a valid time in Pacific/Enderbury timezone.
- // On 1995-01-01 00:00:00 GMT offset moved from -11:00 hr to +13:00 which makes all values
- // on 1995-01-01 invalid. Try this with joda time
- // new MutableDateTime("1995-01-01", DateTimeZone.forTimeZone(readerTimeZone));
- });
- return result;
- }
-
- @Rule
- public TestName testCaseName = new TestName();
-
- @Before
- public void openFileSystem() throws Exception {
- conf = new Configuration();
- fs = FileSystem.getLocal(conf);
- testFilePath = new Path(workDir, "TestOrcFile." +
- testCaseName.getMethodName() + ".orc");
- fs.delete(testFilePath, false);
- }
-
- @After
- public void restoreTimeZone() {
- TimeZone.setDefault(defaultTimeZone);
- }
-
- @Test
- public void testTimestampWriter() throws Exception {
- TypeDescription schema = TypeDescription.createTimestamp();
-
- TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
- .bufferSize(10000));
- assertEquals(writerTimeZone, TimeZone.getDefault().getID());
- List<String> ts = Lists.newArrayList();
- ts.add("2003-01-01 01:00:00.000000222");
- ts.add("1996-08-02 09:00:00.723100809");
- ts.add("1999-01-01 02:00:00.999999999");
- ts.add("1995-01-02 03:00:00.688888888");
- ts.add("2002-01-01 04:00:00.1");
- ts.add("2010-03-02 05:00:00.000009001");
- ts.add("2005-01-01 06:00:00.000002229");
- ts.add("2006-01-01 07:00:00.900203003");
- ts.add("2003-01-01 08:00:00.800000007");
- ts.add("1998-11-02 10:00:00.857340643");
- ts.add("2008-10-02 11:00:00.0");
- ts.add("2037-01-01 00:00:00.000999");
- ts.add("2014-03-28 00:00:00.0");
- VectorizedRowBatch batch = schema.createRowBatch();
- TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
- for (String t : ts) {
- times.set(batch.size++, Timestamp.valueOf(t));
- }
- writer.addRowBatch(batch);
- writer.close();
-
- TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
- Reader reader = OrcFile.createReader(testFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- assertEquals(readerTimeZone, TimeZone.getDefault().getID());
- RecordReader rows = reader.rows();
- batch = reader.getSchema().createRowBatch();
- times = (TimestampColumnVector) batch.cols[0];
- int idx = 0;
- while (rows.nextBatch(batch)) {
- for(int r=0; r < batch.size; ++r) {
- assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
- }
- }
- rows.close();
- }
-
- @Test
- public void testReadTimestampFormat_0_11() throws Exception {
- TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
- Path oldFilePath = new Path(getClass().getClassLoader().
- getSystemResource("orc-file-11-format.orc").getPath());
- Reader reader = OrcFile.createReader(oldFilePath,
- OrcFile.readerOptions(conf).filesystem(fs));
- TypeDescription schema = reader.getSchema();
- int col = schema.getFieldNames().indexOf("ts");
- VectorizedRowBatch batch = schema.createRowBatch(10);
- TimestampColumnVector ts = (TimestampColumnVector) batch.cols[col];
-
- boolean[] include = new boolean[schema.getMaximumId() + 1];
- include[schema.getChildren().get(col).getId()] = true;
- RecordReader rows = reader.rows
- (new Reader.Options().include(include));
- assertEquals(true, rows.nextBatch(batch));
- assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
- ts.asScratchTimestamp(0));
-
- // check the contents of second row
- rows.seekToRow(7499);
- assertEquals(true, rows.nextBatch(batch));
- assertEquals(1, batch.size);
- assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
- ts.asScratchTimestamp(0));
-
- // handle the close up
- Assert.assertEquals(false, rows.nextBatch(batch));
- rows.close();
- }
-}