You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/07/19 16:58:30 UTC
[07/37] hive git commit: HIVE-17118. Move the hive-orc source files
to make the package names unique.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
new file mode 100644
index 0000000..8a6337d
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestRunLengthIntegerReader {
+
+ public void runSeekTest(CompressionCodec codec) throws Exception {
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+ new OutStream("test", 1000, codec, collect), true);
+ TestInStream.PositionCollector[] positions =
+ new TestInStream.PositionCollector[4096];
+ Random random = new Random(99);
+ int[] junk = new int[2048];
+ for(int i=0; i < junk.length; ++i) {
+ junk[i] = random.nextInt();
+ }
+ for(int i=0; i < 4096; ++i) {
+ positions[i] = new TestInStream.PositionCollector();
+ out.getPosition(positions[i]);
+ // test runs, incrementing runs, non-runs
+ if (i < 1024) {
+ out.write(i/4);
+ } else if (i < 2048) {
+ out.write(2*i);
+ } else {
+ out.write(junk[i-2048]);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+ ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+ codec, 1000), true);
+ for(int i=0; i < 2048; ++i) {
+ int x = (int) in.next();
+ if (i < 1024) {
+ assertEquals(i/4, x);
+ } else if (i < 2048) {
+ assertEquals(2*i, x);
+ } else {
+ assertEquals(junk[i-2048], x);
+ }
+ }
+ for(int i=2047; i >= 0; --i) {
+ in.seek(positions[i]);
+ int x = (int) in.next();
+ if (i < 1024) {
+ assertEquals(i/4, x);
+ } else if (i < 2048) {
+ assertEquals(2*i, x);
+ } else {
+ assertEquals(junk[i-2048], x);
+ }
+ }
+ }
+
+ @Test
+ public void testUncompressedSeek() throws Exception {
+ runSeekTest(null);
+ }
+
+ @Test
+ public void testCompressedSeek() throws Exception {
+ runSeekTest(new ZlibCodec());
+ }
+
+ @Test
+ public void testSkips() throws Exception {
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+ new OutStream("test", 100, null, collect), true);
+ for(int i=0; i < 2048; ++i) {
+ if (i < 1024) {
+ out.write(i);
+ } else {
+ out.write(256 * i);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+ ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+ null, 100), true);
+ for(int i=0; i < 2048; i += 10) {
+ int x = (int) in.next();
+ if (i < 1024) {
+ assertEquals(i, x);
+ } else {
+ assertEquals(256 * i, x);
+ }
+ if (i < 2038) {
+ in.skip(9);
+ }
+ in.skip(0);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
new file mode 100644
index 0000000..cc963c8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
@@ -0,0 +1,480 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.Reader;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestSchemaEvolution {
+
+ @Rule
+ public TestName testCaseName = new TestName();
+
+ Configuration conf;
+ Path testFilePath;
+ FileSystem fs;
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target" + File.separator + "test" + File.separator + "tmp"));
+
+ @Before
+ public void setup() throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ testFilePath = new Path(workDir, "TestOrcFile." +
+ testCaseName.getMethodName() + ".orc");
+ fs.delete(testFilePath, false);
+ }
+
+ @Test
+ public void testDataTypeConversion1() throws IOException {
+ TypeDescription fileStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+ assertFalse(same1.hasConversion());
+ TypeDescription readerStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+ assertFalse(both1.hasConversion());
+ TypeDescription readerStruct1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+ assertTrue(both1diff.hasConversion());
+ TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+ SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, null);
+ assertTrue(both1diffPrecision.hasConversion());
+ }
+
+ @Test
+ public void testDataTypeConversion2() throws IOException {
+ TypeDescription fileStruct2 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createByte())
+ .addUnionChild(TypeDescription.createDecimal()
+ .withPrecision(20).withScale(10)))
+ .addField("f2", TypeDescription.createStruct()
+ .addField("f3", TypeDescription.createDate())
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createBoolean()))
+ .addField("f6", TypeDescription.createChar().withMaxLength(100));
+ SchemaEvolution same2 = new SchemaEvolution(fileStruct2, null);
+ assertFalse(same2.hasConversion());
+ TypeDescription readerStruct2 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createByte())
+ .addUnionChild(TypeDescription.createDecimal()
+ .withPrecision(20).withScale(10)))
+ .addField("f2", TypeDescription.createStruct()
+ .addField("f3", TypeDescription.createDate())
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createBoolean()))
+ .addField("f6", TypeDescription.createChar().withMaxLength(100));
+ SchemaEvolution both2 = new SchemaEvolution(fileStruct2, readerStruct2, null);
+ assertFalse(both2.hasConversion());
+ TypeDescription readerStruct2diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createByte())
+ .addUnionChild(TypeDescription.createDecimal()
+ .withPrecision(20).withScale(10)))
+ .addField("f2", TypeDescription.createStruct()
+ .addField("f3", TypeDescription.createDate())
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createByte()))
+ .addField("f6", TypeDescription.createChar().withMaxLength(100));
+ SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null);
+ assertTrue(both2diff.hasConversion());
+ TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createByte())
+ .addUnionChild(TypeDescription.createDecimal()
+ .withPrecision(20).withScale(10)))
+ .addField("f2", TypeDescription.createStruct()
+ .addField("f3", TypeDescription.createDate())
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createBoolean()))
+ .addField("f6", TypeDescription.createChar().withMaxLength(80));
+ SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null);
+ assertTrue(both2diffChar.hasConversion());
+ }
+
+ @Test
+ public void testFloatToDoubleEvolution() throws Exception {
+ testFilePath = new Path(workDir, "TestOrcFile." +
+ testCaseName.getMethodName() + ".orc");
+ TypeDescription schema = TypeDescription.createFloat();
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+ .bufferSize(10000));
+ VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
+ DoubleColumnVector dcv = new DoubleColumnVector(1024);
+ batch.cols[0] = dcv;
+ batch.reset();
+ batch.size = 1;
+ dcv.vector[0] = 74.72f;
+ writer.addRowBatch(batch);
+ writer.close();
+
+ Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs));
+ TypeDescription schemaOnRead = TypeDescription.createDouble();
+ RecordReader rows = reader.rows(new Reader.Options().schema(schemaOnRead));
+ batch = schemaOnRead.createRowBatch();
+ rows.nextBatch(batch);
+ assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001);
+ rows.close();
+ }
+
+ @Test
+ public void testSafePpdEvaluation() throws IOException {
+ TypeDescription fileStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+ assertTrue(same1.isPPDSafeConversion(0));
+ assertFalse(same1.hasConversion());
+ TypeDescription readerStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+ assertFalse(both1.hasConversion());
+ assertTrue(both1.isPPDSafeConversion(0));
+ assertTrue(both1.isPPDSafeConversion(1));
+ assertTrue(both1.isPPDSafeConversion(2));
+ assertTrue(both1.isPPDSafeConversion(3));
+
+ // int -> long
+ TypeDescription readerStruct1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+ SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+ assertTrue(both1diff.hasConversion());
+ assertFalse(both1diff.isPPDSafeConversion(0));
+ assertTrue(both1diff.isPPDSafeConversion(1));
+ assertTrue(both1diff.isPPDSafeConversion(2));
+ assertTrue(both1diff.isPPDSafeConversion(3));
+
+ // decimal(38,10) -> decimal(12, 10)
+ TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+ SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision,
+ new boolean[] {true, false, false, true});
+ assertTrue(both1diffPrecision.hasConversion());
+ assertFalse(both1diffPrecision.isPPDSafeConversion(0));
+ assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included
+ assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included
+ assertFalse(both1diffPrecision.isPPDSafeConversion(3));
+
+ // add columns
+ readerStruct1 = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10))
+ .addField("f4", TypeDescription.createBoolean());
+ both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+ assertTrue(both1.hasConversion());
+ assertFalse(both1.isPPDSafeConversion(0));
+ assertTrue(both1.isPPDSafeConversion(1));
+ assertTrue(both1.isPPDSafeConversion(2));
+ assertTrue(both1.isPPDSafeConversion(3));
+ assertFalse(both1.isPPDSafeConversion(4));
+ }
+
+ @Test
+ public void testSafePpdEvaluationForInts() throws IOException {
+ // byte -> short -> int -> long
+ TypeDescription fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertFalse(schemaEvolution.hasConversion());
+
+ // byte -> short
+ TypeDescription readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // byte -> int
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // byte -> long
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // short -> int -> long
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertFalse(schemaEvolution.hasConversion());
+
+ // unsafe conversion short -> byte
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // short -> int
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // short -> long
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // int -> long
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertFalse(schemaEvolution.hasConversion());
+
+ // unsafe conversion int -> byte
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // unsafe conversion int -> short
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // int -> long
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // long
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // unsafe conversion long -> byte
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // unsafe conversion long -> short
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // unsafe conversion long -> int
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createFloat());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createTimestamp());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+ }
+
+ @Test
+ public void testSafePpdEvaluationForStrings() throws IOException {
+ TypeDescription fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // string -> char
+ TypeDescription readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // string -> varchar
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // char -> string
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // char -> varchar
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ fileSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar());
+ schemaEvolution = new SchemaEvolution(fileSchema, null);
+ assertTrue(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.hasConversion());
+
+ // varchar -> string
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+ // varchar -> char
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createDecimal());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createDate());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+ // invalid
+ readerSchema = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt());
+ schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+ assertTrue(schemaEvolution.hasConversion());
+ assertFalse(schemaEvolution.isPPDSafeConversion(0));
+ assertFalse(schemaEvolution.isPPDSafeConversion(1));
+ }
+
+ @Test
+ public void ensureFileIncluded() throws IOException {
+ TypeDescription file = TypeDescription.fromString("struct<x:int,y:int>");
+ SchemaEvolution evolution = new SchemaEvolution(file, null);
+ boolean[] include = evolution.getFileIncluded();
+ assertEquals(3, include.length);
+ for(int i=0; i < include.length; ++i) {
+ assertTrue("element " + i, include[i]);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java b/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
new file mode 100644
index 0000000..5bcee60
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.util.Random;
+
+import org.junit.Test;
+
+import com.google.common.math.LongMath;
+
+public class TestSerializationUtils {
+
+ private InputStream fromBuffer(ByteArrayOutputStream buffer) {
+ return new ByteArrayInputStream(buffer.toByteArray());
+ }
+
+ @Test
+ public void testDoubles() throws Exception {
+ double tolerance = 0.0000000000000001;
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ SerializationUtils utils = new SerializationUtils();
+ utils.writeDouble(buffer, 1343822337.759);
+ assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), tolerance);
+ buffer = new ByteArrayOutputStream();
+ utils.writeDouble(buffer, 0.8);
+ double got = utils.readDouble(fromBuffer(buffer));
+ assertEquals(0.8, got, tolerance);
+ }
+
+ @Test
+ public void testBigIntegers() throws Exception {
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0));
+ assertArrayEquals(new byte[]{0}, buffer.toByteArray());
+ assertEquals(0L,
+ SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1));
+ assertArrayEquals(new byte[]{2}, buffer.toByteArray());
+ assertEquals(1L,
+ SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1));
+ assertArrayEquals(new byte[]{1}, buffer.toByteArray());
+ assertEquals(-1L,
+ SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50));
+ assertArrayEquals(new byte[]{100}, buffer.toByteArray());
+ assertEquals(50L,
+ SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50));
+ assertArrayEquals(new byte[]{99}, buffer.toByteArray());
+ assertEquals(-50L,
+ SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+ for(int i=-8192; i < 8192; ++i) {
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i));
+ assertEquals("compare length for " + i,
+ i >= -64 && i < 64 ? 1 : 2, buffer.size());
+ assertEquals("compare result for " + i,
+ i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue());
+ }
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer,
+ new BigInteger("123456789abcdef0",16));
+ assertEquals(new BigInteger("123456789abcdef0",16),
+ SerializationUtils.readBigInteger(fromBuffer(buffer)));
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer,
+ new BigInteger("-123456789abcdef0",16));
+ assertEquals(new BigInteger("-123456789abcdef0",16),
+ SerializationUtils.readBigInteger(fromBuffer(buffer)));
+ StringBuilder buf = new StringBuilder();
+ for(int i=0; i < 256; ++i) {
+ String num = Integer.toHexString(i);
+ if (num.length() == 1) {
+ buf.append('0');
+ }
+ buf.append(num);
+ }
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer,
+ new BigInteger(buf.toString(),16));
+ assertEquals(new BigInteger(buf.toString(),16),
+ SerializationUtils.readBigInteger(fromBuffer(buffer)));
+ buffer.reset();
+ SerializationUtils.writeBigInteger(buffer,
+ new BigInteger("ff000000000000000000000000000000000000000000ff",16));
+ assertEquals(
+ new BigInteger("ff000000000000000000000000000000000000000000ff",16),
+ SerializationUtils.readBigInteger(fromBuffer(buffer)));
+ }
+
+ @Test
+ public void testSubtractionOverflow() {
+ // cross check results with Guava results below
+ SerializationUtils utils = new SerializationUtils();
+ assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE));
+ assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE));
+ assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE));
+ assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L));
+ assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE));
+ assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0));
+ }
+
+ @Test
+ public void testSubtractionOverflowGuava() {
+ try {
+ LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE);
+ fail("expected ArithmeticException for overflow");
+ } catch (ArithmeticException ex) {
+ assertEquals(ex.getMessage(), "overflow");
+ }
+
+ try {
+ LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE);
+ fail("expected ArithmeticException for overflow");
+ } catch (ArithmeticException ex) {
+ assertEquals(ex.getMessage(), "overflow");
+ }
+
+ try {
+ LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE);
+ fail("expected ArithmeticException for overflow");
+ } catch (ArithmeticException ex) {
+ assertEquals(ex.getMessage(), "overflow");
+ }
+
+ assertEquals(-8106206116692740190L,
+ LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L));
+ assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE));
+ assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0));
+ }
+
+ @Test
+ public void testRandomFloats() throws Exception {
+ float tolerance = 0.0000000000000001f;
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ SerializationUtils utils = new SerializationUtils();
+ Random rand = new Random();
+ int n = 100_000;
+ float[] expected = new float[n];
+ for (int i = 0; i < n; i++) {
+ float f = rand.nextFloat();
+ expected[i] = f;
+ utils.writeFloat(buffer, f);
+ }
+ InputStream newBuffer = fromBuffer(buffer);
+ for (int i = 0; i < n; i++) {
+ float got = utils.readFloat(newBuffer);
+ assertEquals(expected[i], got, tolerance);
+ }
+ }
+
+ @Test
+ public void testRandomDoubles() throws Exception {
+ double tolerance = 0.0000000000000001;
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ SerializationUtils utils = new SerializationUtils();
+ Random rand = new Random();
+ int n = 100_000;
+ double[] expected = new double[n];
+ for (int i = 0; i < n; i++) {
+ double d = rand.nextDouble();
+ expected[i] = d;
+ utils.writeDouble(buffer, d);
+ }
+ InputStream newBuffer = fromBuffer(buffer);
+ for (int i = 0; i < n; i++) {
+ double got = utils.readDouble(newBuffer);
+ assertEquals(expected[i], got, tolerance);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java b/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
new file mode 100644
index 0000000..4aed06c
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.OrcProto;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestStreamName {
+
+ @Test
+ public void test1() throws Exception {
+ StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
+ StreamName s2 = new StreamName(3,
+ OrcProto.Stream.Kind.DICTIONARY_DATA);
+ StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
+ StreamName s4 = new StreamName(5,
+ OrcProto.Stream.Kind.DICTIONARY_DATA);
+ StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
+ assertEquals(true, s1.equals(s1));
+ assertEquals(false, s1.equals(s2));
+ assertEquals(false, s1.equals(s3));
+ assertEquals(true, s1.equals(s1p));
+ assertEquals(true, s1.compareTo(null) < 0);
+ assertEquals(false, s1.equals(null));
+ assertEquals(true, s1.compareTo(s2) < 0);
+ assertEquals(true, s2.compareTo(s3) < 0);
+ assertEquals(true, s3.compareTo(s4) < 0);
+ assertEquals(true, s4.compareTo(s1p) > 0);
+ assertEquals(0, s1p.compareTo(s1));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java b/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
new file mode 100644
index 0000000..c3e51b8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Test the red-black tree with string keys.
+ */
+public class TestStringRedBlackTree {
+
+ /**
+ * Checks the red-black tree rules to make sure that we have correctly built
+ * a valid tree.
+ *
+ * Properties:
+ * 1. Red nodes must have black children
+ * 2. Each node must have the same black height on both sides.
+ *
+ * @param node The id of the root of the subtree to check for the red-black
+ * tree properties.
+ * @return The black-height of the subtree.
+ */
+ private int checkSubtree(RedBlackTree tree, int node, IntWritable count
+ ) throws IOException {
+ if (node == RedBlackTree.NULL) {
+ return 1;
+ }
+ count.set(count.get() + 1);
+ boolean is_red = tree.isRed(node);
+ int left = tree.getLeft(node);
+ int right = tree.getRight(node);
+ if (is_red) {
+ if (tree.isRed(left)) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Left node of " + node + " is " + left +
+ " and both are red.");
+ }
+ if (tree.isRed(right)) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Right node of " + node + " is " +
+ right + " and both are red.");
+ }
+ }
+ int left_depth = checkSubtree(tree, left, count);
+ int right_depth = checkSubtree(tree, right, count);
+ if (left_depth != right_depth) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Lopsided tree at node " + node +
+ " with depths " + left_depth + " and " + right_depth);
+ }
+ if (is_red) {
+ return left_depth;
+ } else {
+ return left_depth + 1;
+ }
+ }
+
+ /**
+ * Checks the validity of the entire tree. Also ensures that the number of
+ * nodes visited is the same as the size of the set.
+ */
+ void checkTree(RedBlackTree tree) throws IOException {
+ IntWritable count = new IntWritable(0);
+ if (tree.isRed(tree.root)) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("root is red");
+ }
+ checkSubtree(tree, tree.root, count);
+ if (count.get() != tree.size) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Broken tree! visited= " + count.get() +
+ " size=" + tree.size);
+ }
+ }
+
+ void printTree(RedBlackTree tree, String indent, int node
+ ) throws IOException {
+ if (node == RedBlackTree.NULL) {
+ System.err.println(indent + "NULL");
+ } else {
+ System.err.println(indent + "Node " + node + " color " +
+ (tree.isRed(node) ? "red" : "black"));
+ printTree(tree, indent + " ", tree.getLeft(node));
+ printTree(tree, indent + " ", tree.getRight(node));
+ }
+ }
+
+ private static class MyVisitor implements StringRedBlackTree.Visitor {
+ private final String[] words;
+ private final int[] order;
+ private final DataOutputBuffer buffer = new DataOutputBuffer();
+ int current = 0;
+
+ MyVisitor(String[] args, int[] order) {
+ words = args;
+ this.order = order;
+ }
+
+ @Override
+ public void visit(StringRedBlackTree.VisitorContext context
+ ) throws IOException {
+ String word = context.getText().toString();
+ assertEquals("in word " + current, words[current], word);
+ assertEquals("in word " + current, order[current],
+ context.getOriginalPosition());
+ buffer.reset();
+ context.writeBytes(buffer);
+ assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
+ current += 1;
+ }
+ }
+
+ void checkContents(StringRedBlackTree tree, int[] order,
+ String... params
+ ) throws IOException {
+ tree.visit(new MyVisitor(params, order));
+ }
+
+ StringRedBlackTree buildTree(String... params) throws IOException {
+ StringRedBlackTree result = new StringRedBlackTree(1000);
+ for(String word: params) {
+ result.add(word);
+ checkTree(result);
+ }
+ return result;
+ }
+
+ @Test
+ public void test1() throws Exception {
+ StringRedBlackTree tree = new StringRedBlackTree(5);
+ assertEquals(0, tree.getSizeInBytes());
+ checkTree(tree);
+ assertEquals(0, tree.add("owen"));
+ checkTree(tree);
+ assertEquals(1, tree.add("ashutosh"));
+ checkTree(tree);
+ assertEquals(0, tree.add("owen"));
+ checkTree(tree);
+ assertEquals(2, tree.add("alan"));
+ checkTree(tree);
+ assertEquals(2, tree.add("alan"));
+ checkTree(tree);
+ assertEquals(1, tree.add("ashutosh"));
+ checkTree(tree);
+ assertEquals(3, tree.add("greg"));
+ checkTree(tree);
+ assertEquals(4, tree.add("eric"));
+ checkTree(tree);
+ assertEquals(5, tree.add("arun"));
+ checkTree(tree);
+ assertEquals(6, tree.size());
+ checkTree(tree);
+ assertEquals(6, tree.add("eric14"));
+ checkTree(tree);
+ assertEquals(7, tree.add("o"));
+ checkTree(tree);
+ assertEquals(8, tree.add("ziggy"));
+ checkTree(tree);
+ assertEquals(9, tree.add("z"));
+ checkTree(tree);
+ checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8},
+ "alan", "arun", "ashutosh", "eric", "eric14", "greg",
+ "o", "owen", "z", "ziggy");
+ assertEquals(32888, tree.getSizeInBytes());
+ // check that adding greg again bumps the count
+ assertEquals(3, tree.add("greg"));
+ assertEquals(41, tree.getCharacterSize());
+ // add some more strings to test the different branches of the
+ // rebalancing
+ assertEquals(10, tree.add("zak"));
+ checkTree(tree);
+ assertEquals(11, tree.add("eric1"));
+ checkTree(tree);
+ assertEquals(12, tree.add("ash"));
+ checkTree(tree);
+ assertEquals(13, tree.add("harry"));
+ checkTree(tree);
+ assertEquals(14, tree.add("john"));
+ checkTree(tree);
+ tree.clear();
+ checkTree(tree);
+ assertEquals(0, tree.getSizeInBytes());
+ assertEquals(0, tree.getCharacterSize());
+ }
+
+ @Test
+ public void test2() throws Exception {
+ StringRedBlackTree tree =
+ buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+ "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+ assertEquals(26, tree.size());
+ checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+ 15,16,17, 18,19,20, 21,22,23, 24,25},
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
+ "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+ }
+
+ @Test
+ public void test3() throws Exception {
+ StringRedBlackTree tree =
+ buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
+ "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
+ assertEquals(26, tree.size());
+ checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+ 13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+ "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestZlib.java b/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
new file mode 100644
index 0000000..c87f4a8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+public class TestZlib {
+
+ @Test
+ public void testNoOverflow() throws Exception {
+ ByteBuffer in = ByteBuffer.allocate(10);
+ ByteBuffer out = ByteBuffer.allocate(10);
+ in.put(new byte[]{1,2,3,4,5,6,7,10});
+ in.flip();
+ CompressionCodec codec = new ZlibCodec();
+ assertEquals(false, codec.compress(in, out, null));
+ }
+
+ @Test
+ public void testCorrupt() throws Exception {
+ ByteBuffer buf = ByteBuffer.allocate(1000);
+ buf.put(new byte[]{127,-128,0,99,98,-1});
+ buf.flip();
+ CompressionCodec codec = new ZlibCodec();
+ ByteBuffer out = ByteBuffer.allocate(1000);
+ try {
+ codec.decompress(buf, out);
+ fail();
+ } catch (IOException ioe) {
+ // EXPECTED
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java b/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
new file mode 100644
index 0000000..50e6208
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
@@ -0,0 +1,485 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFileDump {
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir"));
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Before
+ public void openFileSystem () throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ testFilePath = new Path("TestFileDump.testDump.orc");
+ fs.delete(testFilePath, false);
+ }
+
+ static TypeDescription getMyRecordType() {
+ return TypeDescription.createStruct()
+ .addField("i", TypeDescription.createInt())
+ .addField("l", TypeDescription.createLong())
+ .addField("s", TypeDescription.createString());
+ }
+
+ static void appendMyRecord(VectorizedRowBatch batch,
+ int i,
+ long l,
+ String str) {
+ ((LongColumnVector) batch.cols[0]).vector[batch.size] = i;
+ ((LongColumnVector) batch.cols[1]).vector[batch.size] = l;
+ if (str == null) {
+ batch.cols[2].noNulls = false;
+ batch.cols[2].isNull[batch.size] = true;
+ } else {
+ ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+ str.getBytes());
+ }
+ batch.size += 1;
+ }
+
+ static TypeDescription getAllTypesType() {
+ return TypeDescription.createStruct()
+ .addField("b", TypeDescription.createBoolean())
+ .addField("bt", TypeDescription.createByte())
+ .addField("s", TypeDescription.createShort())
+ .addField("i", TypeDescription.createInt())
+ .addField("l", TypeDescription.createLong())
+ .addField("f", TypeDescription.createFloat())
+ .addField("d", TypeDescription.createDouble())
+ .addField("de", TypeDescription.createDecimal())
+ .addField("t", TypeDescription.createTimestamp())
+ .addField("dt", TypeDescription.createDate())
+ .addField("str", TypeDescription.createString())
+ .addField("c", TypeDescription.createChar().withMaxLength(5))
+ .addField("vc", TypeDescription.createVarchar().withMaxLength(10))
+ .addField("m", TypeDescription.createMap(
+ TypeDescription.createString(),
+ TypeDescription.createString()))
+ .addField("a", TypeDescription.createList(TypeDescription.createInt()))
+ .addField("st", TypeDescription.createStruct()
+ .addField("i", TypeDescription.createInt())
+ .addField("s", TypeDescription.createString()));
+ }
+
+ static void appendAllTypes(VectorizedRowBatch batch,
+ boolean b,
+ byte bt,
+ short s,
+ int i,
+ long l,
+ float f,
+ double d,
+ HiveDecimalWritable de,
+ Timestamp t,
+ DateWritable dt,
+ String str,
+ String c,
+ String vc,
+ Map<String, String> m,
+ List<Integer> a,
+ int sti,
+ String sts) {
+ int row = batch.size++;
+ ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0;
+ ((LongColumnVector) batch.cols[1]).vector[row] = bt;
+ ((LongColumnVector) batch.cols[2]).vector[row] = s;
+ ((LongColumnVector) batch.cols[3]).vector[row] = i;
+ ((LongColumnVector) batch.cols[4]).vector[row] = l;
+ ((DoubleColumnVector) batch.cols[5]).vector[row] = f;
+ ((DoubleColumnVector) batch.cols[6]).vector[row] = d;
+ ((DecimalColumnVector) batch.cols[7]).vector[row].set(de);
+ ((TimestampColumnVector) batch.cols[8]).set(row, t);
+ ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays();
+ ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes());
+ ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes());
+ ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes());
+ MapColumnVector map = (MapColumnVector) batch.cols[13];
+ int offset = map.childCount;
+ map.offsets[row] = offset;
+ map.lengths[row] = m.size();
+ map.childCount += map.lengths[row];
+ for(Map.Entry<String, String> entry: m.entrySet()) {
+ ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
+ ((BytesColumnVector) map.values).setVal(offset++,
+ entry.getValue().getBytes());
+ }
+ ListColumnVector list = (ListColumnVector) batch.cols[14];
+ offset = list.childCount;
+ list.offsets[row] = offset;
+ list.lengths[row] = a.size();
+ list.childCount += list.lengths[row];
+ for(int e=0; e < a.size(); ++e) {
+ ((LongColumnVector) list.child).vector[offset + e] = a.get(e);
+ }
+ StructColumnVector struct = (StructColumnVector) batch.cols[15];
+ ((LongColumnVector) struct.fields[0]).vector[row] = sti;
+ ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes());
+ }
+
+ public static void checkOutput(String expected,
+ String actual) throws Exception {
+ BufferedReader eStream =
+ new BufferedReader(new FileReader
+ (TestJsonFileDump.getFileFromClasspath(expected)));
+ BufferedReader aStream =
+ new BufferedReader(new FileReader(actual));
+ String expectedLine = eStream.readLine().trim();
+ while (expectedLine != null) {
+ String actualLine = aStream.readLine().trim();
+ System.out.println("actual: " + actualLine);
+ System.out.println("expected: " + expectedLine);
+ Assert.assertEquals(expectedLine, actualLine);
+ expectedLine = eStream.readLine();
+ expectedLine = expectedLine == null ? null : expectedLine.trim();
+ }
+ Assert.assertNull(eStream.readLine());
+ Assert.assertNull(aStream.readLine());
+ eStream.close();
+ aStream.close();
+ }
+
+ @Test
+ public void testDump() throws Exception {
+ TypeDescription schema = getMyRecordType();
+ conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .setSchema(schema)
+ .compress(CompressionKind.ZLIB)
+ .stripeSize(100000)
+ .rowIndexStride(1000));
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ VectorizedRowBatch batch = schema.createRowBatch(1000);
+ for(int i=0; i < 21000; ++i) {
+ appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]);
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size > 0) {
+ writer.addRowBatch(batch);
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+
+ @Test
+ public void testDataDump() throws Exception {
+ TypeDescription schema = getAllTypesType();
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .setSchema(schema)
+ .stripeSize(100000)
+ .compress(CompressionKind.NONE)
+ .bufferSize(10000)
+ .rowIndexStride(1000));
+ VectorizedRowBatch batch = schema.createRowBatch(1000);
+ Map<String, String> m = new HashMap<String, String>(2);
+ m.put("k1", "v1");
+ appendAllTypes(batch,
+ true,
+ (byte) 10,
+ (short) 100,
+ 1000,
+ 10000L,
+ 4.0f,
+ 20.0,
+ new HiveDecimalWritable("4.2222"),
+ new Timestamp(1416967764000L),
+ new DateWritable(new Date(1416967764000L)),
+ "string",
+ "hello",
+ "hello",
+ m,
+ Arrays.asList(100, 200),
+ 10, "foo");
+ m.clear();
+ m.put("k3", "v3");
+ appendAllTypes(
+ batch,
+ false,
+ (byte)20,
+ (short)200,
+ 2000,
+ 20000L,
+ 8.0f,
+ 40.0,
+ new HiveDecimalWritable("2.2222"),
+ new Timestamp(1416967364000L),
+ new DateWritable(new Date(1411967764000L)),
+ "abcd",
+ "world",
+ "world",
+ m,
+ Arrays.asList(200, 300),
+ 20, "bar");
+ writer.addRowBatch(batch);
+
+ writer.close();
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "-d"});
+ System.out.flush();
+ System.setOut(origOut);
+ String[] lines = myOut.toString().split("\n");
+ Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
+ Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
+ }
+
+ // Test that if the fraction of rows that have distinct strings is greater than the configured
+ // threshold dictionary encoding is turned off. If dictionary encoding is turned off the length
+ // of the dictionary stream for the column will be 0 in the ORC file dump.
+ @Test
+ public void testDictionaryThreshold() throws Exception {
+ TypeDescription schema = getMyRecordType();
+ Configuration conf = new Configuration();
+ conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+ conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f);
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .setSchema(schema)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .rowIndexStride(1000)
+ .bufferSize(10000));
+ VectorizedRowBatch batch = schema.createRowBatch(1000);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ int nextInt = 0;
+ for(int i=0; i < 21000; ++i) {
+ // Write out the same string twice, this guarantees the fraction of rows with
+ // distinct strings is 0.5
+ if (i % 2 == 0) {
+ nextInt = r1.nextInt(words.length);
+ // Append the value of i to the word, this guarantees when an index or word is repeated
+ // the actual string is unique.
+ words[nextInt] += "-" + i;
+ }
+ appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]);
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size != 0) {
+ writer.addRowBatch(batch);
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-dictionary-threshold.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+
+ @Test
+ public void testBloomFilter() throws Exception {
+ TypeDescription schema = getMyRecordType();
+ conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .setSchema(schema)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("S");
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ VectorizedRowBatch batch = schema.createRowBatch(1000);
+ for(int i=0; i < 21000; ++i) {
+ appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]);
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size > 0) {
+ writer.addRowBatch(batch);
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-bloomfilter.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+
+ @Test
+ public void testBloomFilter2() throws Exception {
+ TypeDescription schema = getMyRecordType();
+ conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .setSchema(schema)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("l")
+ .bloomFilterFpp(0.01);
+ VectorizedRowBatch batch = schema.createRowBatch(1000);
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ for(int i=0; i < 21000; ++i) {
+ appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]);
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size > 0) {
+ writer.addRowBatch(batch);
+ }
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump-bloomfilter2.out";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java b/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
new file mode 100644
index 0000000..efded7a
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.net.URL;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestJsonFileDump {
+ public static String getFileFromClasspath(String name) {
+ URL url = ClassLoader.getSystemResource(name);
+ if (url == null) {
+ throw new IllegalArgumentException("Could not find " + name);
+ }
+ return url.getPath();
+ }
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir"));
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Before
+ public void openFileSystem () throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ testFilePath = new Path("TestFileDump.testDump.orc");
+ fs.delete(testFilePath, false);
+ }
+
+ static void checkOutput(String expected,
+ String actual) throws Exception {
+ BufferedReader eStream =
+ new BufferedReader(new FileReader(getFileFromClasspath(expected)));
+ BufferedReader aStream =
+ new BufferedReader(new FileReader(actual));
+ String expectedLine = eStream.readLine();
+ while (expectedLine != null) {
+ String actualLine = aStream.readLine();
+ System.out.println("actual: " + actualLine);
+ System.out.println("expected: " + expectedLine);
+ assertEquals(expectedLine, actualLine);
+ expectedLine = eStream.readLine();
+ }
+ assertNull(eStream.readLine());
+ assertNull(aStream.readLine());
+ }
+
+ @Test
+ public void testJsonDump() throws Exception {
+ TypeDescription schema = TypeDescription.createStruct()
+ .addField("i", TypeDescription.createInt())
+ .addField("l", TypeDescription.createLong())
+ .addField("s", TypeDescription.createString());
+ conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .setSchema(schema)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("s");
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ VectorizedRowBatch batch = schema.createRowBatch(1000);
+ for(int i=0; i < 21000; ++i) {
+ ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt();
+ ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong();
+ if (i % 100 == 0) {
+ batch.cols[2].noNulls = false;
+ batch.cols[2].isNull[batch.size] = true;
+ } else {
+ ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+ words[r1.nextInt(words.length)].getBytes());
+ }
+ batch.size += 1;
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size > 0) {
+ writer.addRowBatch(batch);
+ }
+
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump.json";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+}