You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2013/03/05 21:44:52 UTC
svn commit: r1452992 [7/8] - in /hive/trunk: ./ ivy/ ql/
ql/src/gen/protobuf/ ql/src/gen/protobuf/gen-java/
ql/src/gen/protobuf/gen-java/org/ ql/src/gen/protobuf/gen-java/org/apache/
ql/src/gen/protobuf/gen-java/org/apache/hadoop/ ql/src/gen/protobuf/g...
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,360 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestInputOutputFormat {
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir","target/test/tmp"));
+
+ public static class MyRow implements Writable {
+ int x;
+ int y;
+ MyRow(int x, int y) {
+ this.x = x;
+ this.y = y;
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ throw new UnsupportedOperationException("no write");
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ throw new UnsupportedOperationException("no read");
+ }
+ }
+
+ @Rule
+ public TestName testCaseName = new TestName();
+ JobConf conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Before
+ public void openFileSystem () throws Exception {
+ conf = new JobConf();
+ fs = FileSystem.getLocal(conf);
+ testFilePath = new Path(workDir, "TestInputOutputFormat." +
+ testCaseName.getMethodName() + ".orc");
+ fs.delete(testFilePath, false);
+ }
+
+ @Test
+ public void testInOutFormat() throws Exception {
+ Properties properties = new Properties();
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ SerDe serde = new OrcSerde();
+ HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ FileSinkOperator.RecordWriter writer =
+ outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
+ properties, Reporter.NULL);
+ writer.write(serde.serialize(new MyRow(1,2), inspector));
+ writer.write(serde.serialize(new MyRow(2,2), inspector));
+ writer.write(serde.serialize(new MyRow(3,2), inspector));
+ writer.close(true);
+ serde = new OrcSerde();
+ properties.setProperty("columns", "x,y");
+ properties.setProperty("columns.types", "int:int");
+ serde.initialize(conf, properties);
+ assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
+ inspector = (StructObjectInspector) serde.getObjectInspector();
+ assertEquals("struct<x:int,y:int>", inspector.getTypeName());
+ InputFormat<?,?> in = new OrcInputFormat();
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ InputSplit[] splits = in.getSplits(conf, 1);
+ assertEquals(1, splits.length);
+
+ // the the validate input method
+ ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
+ assertEquals(false,
+ ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
+ fileList.add(fs.getFileStatus(testFilePath));
+ assertEquals(true,
+ ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
+ fileList.add(fs.getFileStatus(workDir));
+ assertEquals(false,
+ ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
+
+
+ // read the whole file
+ org.apache.hadoop.mapred.RecordReader reader =
+ in.getRecordReader(splits[0], conf, Reporter.NULL);
+ Object key = reader.createKey();
+ Writable value = (Writable) reader.createValue();
+ int rowNum = 0;
+ List<? extends StructField> fields =inspector.getAllStructFieldRefs();
+ IntObjectInspector intInspector =
+ (IntObjectInspector) fields.get(0).getFieldObjectInspector();
+ assertEquals(0.0, reader.getProgress(), 0.00001);
+ assertEquals(0, reader.getPos());
+ while (reader.next(key, value)) {
+ assertEquals(++rowNum, intInspector.get(inspector.
+ getStructFieldData(serde.deserialize(value), fields.get(0))));
+ assertEquals(2, intInspector.get(inspector.
+ getStructFieldData(serde.deserialize(value), fields.get(1))));
+ }
+ assertEquals(3, rowNum);
+ assertEquals(1.0, reader.getProgress(), 0.00001);
+ reader.close();
+
+ // read just the first column
+ conf.set("hive.io.file.readcolumn.ids", "0");
+ reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
+ key = reader.createKey();
+ value = (Writable) reader.createValue();
+ rowNum = 0;
+ fields = inspector.getAllStructFieldRefs();
+ while (reader.next(key, value)) {
+ assertEquals(++rowNum, intInspector.get(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
+ }
+ assertEquals(3, rowNum);
+ reader.close();
+ }
+
+ static class NestedRow implements Writable {
+ int z;
+ MyRow r;
+ NestedRow(int x, int y, int z) {
+ this.z = z;
+ this.r = new MyRow(x,y);
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ throw new UnsupportedOperationException("unsupported");
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ throw new UnsupportedOperationException("unsupported");
+ }
+ }
+
+ @Test
+ public void testMROutput() throws Exception {
+ JobConf job = new JobConf(conf);
+ Properties properties = new Properties();
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ SerDe serde = new OrcSerde();
+ OutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ RecordWriter writer =
+ outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
+ Reporter.NULL);
+ writer.write(NullWritable.get(),
+ serde.serialize(new NestedRow(1,2,3), inspector));
+ writer.write(NullWritable.get(),
+ serde.serialize(new NestedRow(4,5,6), inspector));
+ writer.write(NullWritable.get(),
+ serde.serialize(new NestedRow(7,8,9), inspector));
+ writer.close(Reporter.NULL);
+ serde = new OrcSerde();
+ properties.setProperty("columns", "z,r");
+ properties.setProperty("columns.types", "int:struct<x:int,y:int>");
+ serde.initialize(conf, properties);
+ inspector = (StructObjectInspector) serde.getObjectInspector();
+ InputFormat<?,?> in = new OrcInputFormat();
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ InputSplit[] splits = in.getSplits(conf, 1);
+ assertEquals(1, splits.length);
+ conf.set("hive.io.file.readcolumn.ids", "1");
+ org.apache.hadoop.mapred.RecordReader reader =
+ in.getRecordReader(splits[0], conf, Reporter.NULL);
+ Object key = reader.createKey();
+ Object value = reader.createValue();
+ int rowNum = 0;
+ List<? extends StructField> fields = inspector.getAllStructFieldRefs();
+ StructObjectInspector inner = (StructObjectInspector)
+ fields.get(1).getFieldObjectInspector();
+ List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+ IntObjectInspector intInspector =
+ (IntObjectInspector) fields.get(0).getFieldObjectInspector();
+ while (reader.next(key, value)) {
+ assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
+ Object sub = inspector.getStructFieldData(value, fields.get(1));
+ assertEquals(3*rowNum+1, intInspector.get(inner.getStructFieldData(sub,
+ inFields.get(0))));
+ assertEquals(3*rowNum+2, intInspector.get(inner.getStructFieldData(sub,
+ inFields.get(1))));
+ rowNum += 1;
+ }
+ assertEquals(3, rowNum);
+ reader.close();
+
+ }
+
+ @Test
+ public void testEmptyFile() throws Exception {
+ JobConf job = new JobConf(conf);
+ Properties properties = new Properties();
+ HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ FileSinkOperator.RecordWriter writer =
+ outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
+ properties, Reporter.NULL);
+ writer.close(true);
+ properties.setProperty("columns", "x,y");
+ properties.setProperty("columns.types", "int:int");
+ SerDe serde = new OrcSerde();
+ serde.initialize(conf, properties);
+ InputFormat<?,?> in = new OrcInputFormat();
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ InputSplit[] splits = in.getSplits(conf, 1);
+ assertEquals(1, splits.length);
+
+ // read the whole file
+ conf.set("hive.io.file.readcolumn.ids", "0,1");
+ org.apache.hadoop.mapred.RecordReader reader =
+ in.getRecordReader(splits[0], conf, Reporter.NULL);
+ Object key = reader.createKey();
+ Object value = reader.createValue();
+ assertEquals(0.0, reader.getProgress(), 0.00001);
+ assertEquals(0, reader.getPos());
+ assertEquals(false, reader.next(key, value));
+ reader.close();
+ assertEquals(null, serde.getSerDeStats());
+ }
+
+ static class StringRow implements Writable {
+ String str;
+ String str2;
+ StringRow(String s) {
+ str = s;
+ str2 = s;
+ }
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ throw new UnsupportedOperationException("no write");
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ throw new UnsupportedOperationException("no read");
+ }
+ }
+
+ @Test
+ public void testDefaultTypes() throws Exception {
+ JobConf job = new JobConf(conf);
+ Properties properties = new Properties();
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ SerDe serde = new OrcSerde();
+ HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+ FileSinkOperator.RecordWriter writer =
+ outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
+ true, properties, Reporter.NULL);
+ writer.write(serde.serialize(new StringRow("owen"), inspector));
+ writer.write(serde.serialize(new StringRow("beth"), inspector));
+ writer.write(serde.serialize(new StringRow("laurel"), inspector));
+ writer.write(serde.serialize(new StringRow("hazen"), inspector));
+ writer.write(serde.serialize(new StringRow("colin"), inspector));
+ writer.write(serde.serialize(new StringRow("miles"), inspector));
+ writer.close(true);
+ serde = new OrcSerde();
+ properties.setProperty("columns", "str,str2");
+ serde.initialize(conf, properties);
+ inspector = (StructObjectInspector) serde.getObjectInspector();
+ assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
+ InputFormat<?,?> in = new OrcInputFormat();
+ FileInputFormat.setInputPaths(conf, testFilePath.toString());
+ InputSplit[] splits = in.getSplits(conf, 1);
+ assertEquals(1, splits.length);
+
+ // read the whole file
+ org.apache.hadoop.mapred.RecordReader reader =
+ in.getRecordReader(splits[0], conf, Reporter.NULL);
+ Object key = reader.createKey();
+ Writable value = (Writable) reader.createValue();
+ List<? extends StructField> fields =inspector.getAllStructFieldRefs();
+ StringObjectInspector strInspector = (StringObjectInspector)
+ fields.get(0).getFieldObjectInspector();
+ assertEquals(true, reader.next(key, value));
+ assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(true, reader.next(key, value));
+ assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(true, reader.next(key, value));
+ assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(true, reader.next(key, value));
+ assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(true, reader.next(key, value));
+ assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(true, reader.next(key, value));
+ assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.
+ getStructFieldData(value, fields.get(0))));
+ assertEquals(false, reader.next(key, value));
+ reader.close();
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,896 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static junit.framework.Assert.*;
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Tests for the top level reader/streamFactory of ORC files.
+ */
+public class TestOrcFile {
+
+ public static class InnerStruct {
+ int int1;
+ Text string1 = new Text();
+ InnerStruct(int int1, String string1) {
+ this.int1 = int1;
+ this.string1.set(string1);
+ }
+ }
+
+ public static class MiddleStruct {
+ List<InnerStruct> list = new ArrayList<InnerStruct>();
+
+ MiddleStruct(InnerStruct... items) {
+ list.clear();
+ for(InnerStruct item: items) {
+ list.add(item);
+ }
+ }
+ }
+
+ public static class BigRow {
+ Boolean boolean1;
+ Byte byte1;
+ Short short1;
+ Integer int1;
+ Long long1;
+ Float float1;
+ Double double1;
+ BytesWritable bytes1;
+ Text string1;
+ MiddleStruct middle;
+ List<InnerStruct> list = new ArrayList<InnerStruct>();
+ Map<Text, InnerStruct> map = new HashMap<Text, InnerStruct>();
+
+ BigRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
+ Double d1,
+ BytesWritable b3, String s2, MiddleStruct m1,
+ List<InnerStruct> l2, Map<Text, InnerStruct> m2) {
+ this.boolean1 = b1;
+ this.byte1 = b2;
+ this.short1 = s1;
+ this.int1 = i1;
+ this.long1 = l1;
+ this.float1 = f1;
+ this.double1 = d1;
+ this.bytes1 = b3;
+ if (s2 == null) {
+ this.string1 = null;
+ } else {
+ this.string1 = new Text(s2);
+ }
+ this.middle = m1;
+ this.list = l2;
+ this.map = m2;
+ }
+ }
+
+ private static InnerStruct inner(int i, String s) {
+ return new InnerStruct(i, s);
+ }
+
+ private static Map<Text, InnerStruct> map(InnerStruct... items) {
+ Map<Text, InnerStruct> result = new HashMap<Text, InnerStruct>();
+ for(InnerStruct i: items) {
+ result.put(new Text(i.string1), i);
+ }
+ return result;
+ }
+
+ private static List<InnerStruct> list(InnerStruct... items) {
+ List<InnerStruct> result = new ArrayList<InnerStruct>();
+ for(InnerStruct s: items) {
+ result.add(s);
+ }
+ return result;
+ }
+
+ private static BytesWritable bytes(int... items) {
+ BytesWritable result = new BytesWritable();
+ result.setSize(items.length);
+ for(int i=0; i < items.length; ++i) {
+ result.getBytes()[i] = (byte) items[i];
+ }
+ return result;
+ }
+
+ private static ByteBuffer byteBuf(int... items) {
+ ByteBuffer result = ByteBuffer.allocate(items.length);
+ for(int item: items) {
+ result.put((byte) item);
+ }
+ return result;
+ }
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target" + File.separator + "test" + File.separator + "tmp"));
+
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Rule
+ public TestName testCaseName = new TestName();
+
+ @Before
+ public void openFileSystem () throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ testFilePath = new Path(workDir, "TestOrcFile." +
+ testCaseName.getMethodName() + ".orc");
+ fs.delete(testFilePath, false);
+ }
+
+ @Test
+ public void test1() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 100000, CompressionKind.ZLIB, 10000, 10000);
+ writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
+ Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(3, "good"), inner(4, "bad")),
+ map()));
+ writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
+ Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+ new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+ list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
+ map(inner(5,"chani"), inner(1,"mauddib"))));
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+
+ // check the stats
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(2, stats[1].getNumberOfValues());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+ assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+ assertEquals("count: 2 true: 1", stats[1].toString());
+
+ assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+ assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+ assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+ assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
+ assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+ stats[3].toString());
+
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMaximum());
+ assertEquals(Long.MAX_VALUE,
+ ((IntegerColumnStatistics) stats[5]).getMinimum());
+ assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+ assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+ stats[5].toString());
+
+ assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+ assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+ assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
+ assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+ stats[7].toString());
+
+ assertEquals("count: 2 min: bye max: hi", stats[9].toString());
+
+ // check the inspectors
+ StructObjectInspector readerInspector =
+ (StructObjectInspector) reader.getObjectInspector();
+ assertEquals(ObjectInspector.Category.STRUCT,
+ readerInspector.getCategory());
+ assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
+ + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
+ + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
+ + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
+ + "map:map<string,struct<int1:int,string1:string>>>",
+ readerInspector.getTypeName());
+ List<? extends StructField> fields =
+ readerInspector.getAllStructFieldRefs();
+ BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
+ getStructFieldRef("boolean1").getFieldObjectInspector();
+ ByteObjectInspector by = (ByteObjectInspector) readerInspector.
+ getStructFieldRef("byte1").getFieldObjectInspector();
+ ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
+ getStructFieldRef("short1").getFieldObjectInspector();
+ IntObjectInspector in = (IntObjectInspector) readerInspector.
+ getStructFieldRef("int1").getFieldObjectInspector();
+ LongObjectInspector lo = (LongObjectInspector) readerInspector.
+ getStructFieldRef("long1").getFieldObjectInspector();
+ FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
+ getStructFieldRef("float1").getFieldObjectInspector();
+ DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
+ getStructFieldRef("double1").getFieldObjectInspector();
+ BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+ getStructFieldRef("bytes1").getFieldObjectInspector();
+ StringObjectInspector st = (StringObjectInspector) readerInspector.
+ getStructFieldRef("string1").getFieldObjectInspector();
+ StructObjectInspector mid = (StructObjectInspector) readerInspector.
+ getStructFieldRef("middle").getFieldObjectInspector();
+ List<? extends StructField> midFields =
+ mid.getAllStructFieldRefs();
+ ListObjectInspector midli =
+ (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
+ StructObjectInspector inner = (StructObjectInspector)
+ midli.getListElementObjectInspector();
+ List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+ ListObjectInspector li = (ListObjectInspector) readerInspector.
+ getStructFieldRef("list").getFieldObjectInspector();
+ MapObjectInspector ma = (MapObjectInspector) readerInspector.
+ getStructFieldRef("map").getFieldObjectInspector();
+ StructObjectInspector lc = (StructObjectInspector)
+ li.getListElementObjectInspector();
+ StringObjectInspector mk = (StringObjectInspector)
+ ma.getMapKeyObjectInspector();
+ StructObjectInspector mv = (StructObjectInspector)
+ ma.getMapValueObjectInspector();
+ RecordReader rows = reader.rows(null);
+ Object row = rows.next(null);
+ assertNotNull(row);
+ // check the contents of the first row
+ assertEquals(false,
+ bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals(1, by.get(readerInspector.getStructFieldData(row,
+ fields.get(1))));
+ assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
+ fields.get(2))));
+ assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+ fields.get(3))));
+ assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+ getStructFieldData(row, fields.get(4))));
+ assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
+ fields.get(5))), 0.00001);
+ assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
+ fields.get(6))), 0.00001);
+ assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(7))));
+ assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(8))));
+ List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.
+ getStructFieldData(row, fields.get(9)), midFields.get(0)));
+ assertNotNull(midRow);
+ assertEquals(2, midRow.size());
+ assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+ inFields.get(0))));
+ assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(0), inFields.get(1))));
+ assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+ inFields.get(0))));
+ assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(1), inFields.get(1))));
+ List<?> list = li.getList(readerInspector.getStructFieldData(row,
+ fields.get(10)));
+ assertEquals(2, list.size());
+ assertEquals(3, in.get(inner.getStructFieldData(list.get(0),
+ inFields.get(0))));
+ assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(0), inFields.get(1))));
+ assertEquals(4, in.get(inner.getStructFieldData(list.get(1),
+ inFields.get(0))));
+ assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(1), inFields.get(1))));
+ Map<?,?> map = ma.getMap(readerInspector.getStructFieldData(row,
+ fields.get(11)));
+ assertEquals(0, map.size());
+
+ // check the contents of second row
+ assertEquals(true, rows.hasNext());
+ row = rows.next(row);
+ assertEquals(true,
+ bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+ assertEquals(100, by.get(readerInspector.getStructFieldData(row,
+ fields.get(1))));
+ assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
+ fields.get(2))));
+ assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+ fields.get(3))));
+ assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+ getStructFieldData(row, fields.get(4))));
+ assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
+ fields.get(5))), 0.00001);
+ assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
+ fields.get(6))), 0.00001);
+ assertEquals(bytes(), bi.getPrimitiveWritableObject(
+ readerInspector.getStructFieldData(row, fields.get(7))));
+ assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
+ getStructFieldData(row, fields.get(8))));
+ midRow = midli.getList(mid.getStructFieldData(readerInspector.
+ getStructFieldData(row, fields.get(9)), midFields.get(0)));
+ assertNotNull(midRow);
+ assertEquals(2, midRow.size());
+ assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+ inFields.get(0))));
+ assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(0), inFields.get(1))));
+ assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+ inFields.get(0))));
+ assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (midRow.get(1), inFields.get(1))));
+ list = li.getList(readerInspector.getStructFieldData(row,
+ fields.get(10)));
+ assertEquals(3, list.size());
+ assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0),
+ inFields.get(0))));
+ assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(0), inFields.get(1))));
+ assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1),
+ inFields.get(0))));
+ assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(1), inFields.get(1))));
+ assertEquals(1234, in.get(inner.getStructFieldData(list.get(2),
+ inFields.get(0))));
+ assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData
+ (list.get(2), inFields.get(1))));
+ map = ma.getMap(readerInspector.getStructFieldData(row,
+ fields.get(11)));
+ assertEquals(2, map.size());
+ boolean[] found = new boolean[2];
+ for(Object key: map.keySet()) {
+ String str = mk.getPrimitiveJavaObject(key);
+ if (str.equals("chani")) {
+ assertEquals(false, found[0]);
+ assertEquals(5, in.get(inner.getStructFieldData(map.get(key),
+ inFields.get(0))));
+ assertEquals(str, st.getPrimitiveJavaObject(
+ inner.getStructFieldData(map.get(key), inFields.get(1))));
+ found[0] = true;
+ } else if (str.equals("mauddib")) {
+ assertEquals(false, found[1]);
+ assertEquals(1, in.get(inner.getStructFieldData(map.get(key),
+ inFields.get(0))));
+ assertEquals(str, st.getPrimitiveJavaObject(
+ inner.getStructFieldData(map.get(key), inFields.get(1))));
+ found[1] = true;
+ } else {
+ throw new IllegalArgumentException("Unknown key " + str);
+ }
+ }
+ assertEquals(true, found[0]);
+ assertEquals(true, found[1]);
+
+ // handle the close up
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+ @Test
+ public void columnProjection() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 1000, CompressionKind.NONE, 100, 1000);
+ Random r1 = new Random(1);
+ Random r2 = new Random(2);
+ int x;
+ int minInt=0, maxInt=0;
+ String y;
+ String minStr = null, maxStr = null;
+ for(int i=0; i < 21000; ++i) {
+ x = r1.nextInt();
+ y = Long.toHexString(r2.nextLong());
+ if (i == 0 || x < minInt) {
+ minInt = x;
+ }
+ if (i == 0 || x > maxInt) {
+ maxInt = x;
+ }
+ if (i == 0 || y.compareTo(minStr) < 0) {
+ minStr = y;
+ }
+ if (i == 0 || y.compareTo(maxStr) > 0) {
+ maxStr = y;
+ }
+ writer.addRow(inner(x, y));
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+
+ // check out the statistics
+ ColumnStatistics[] stats = reader.getStatistics();
+ assertEquals(3, stats.length);
+ for(ColumnStatistics s: stats) {
+ assertEquals(21000, s.getNumberOfValues());
+ if (s instanceof IntegerColumnStatistics) {
+ assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
+ assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
+ } else if (s instanceof StringColumnStatistics) {
+ assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
+ assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
+ }
+ }
+
+ // check out the types
+ List<OrcProto.Type> types = reader.getTypes();
+ assertEquals(3, types.size());
+ assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
+ assertEquals(2, types.get(0).getSubtypesCount());
+ assertEquals(1, types.get(0).getSubtypes(0));
+ assertEquals(2, types.get(0).getSubtypes(1));
+ assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
+ assertEquals(0, types.get(1).getSubtypesCount());
+ assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
+ assertEquals(0, types.get(2).getSubtypesCount());
+
+ // read the contents and make sure they match
+ RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
+ RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
+ r1 = new Random(1);
+ r2 = new Random(2);
+ OrcStruct row1 = null;
+ OrcStruct row2 = null;
+ for(int i = 0; i < 21000; ++i) {
+ assertEquals(true, rows1.hasNext());
+ assertEquals(true, rows2.hasNext());
+ row1 = (OrcStruct) rows1.next(row1);
+ row2 = (OrcStruct) rows2.next(row2);
+ assertEquals(r1.nextInt(), ((IntWritable) row1.getFieldValue(0)).get());
+ assertEquals(Long.toHexString(r2.nextLong()),
+ row2.getFieldValue(1).toString());
+ }
+ assertEquals(false, rows1.hasNext());
+ assertEquals(false, rows2.hasNext());
+ rows1.close();
+ rows2.close();
+ }
+
+ @Test
+ public void emptyFile() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 1000, CompressionKind.NONE, 100, 10000);
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ assertEquals(false, reader.rows(null).hasNext());
+ assertEquals(CompressionKind.NONE, reader.getCompression());
+ assertEquals(0, reader.getNumberOfRows());
+ assertEquals(0, reader.getCompressionSize());
+ assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+ assertEquals(3, reader.getContentLength());
+ assertEquals(false, reader.getStripes().iterator().hasNext());
+ }
+
+ @Test
+ public void metaData() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 1000, CompressionKind.NONE, 100, 10000);
+ writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
+ writer.addUserMetadata("clobber", byteBuf(1,2,3));
+ writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
+ ByteBuffer bigBuf = ByteBuffer.allocate(40000);
+ Random random = new Random(0);
+ random.nextBytes(bigBuf.array());
+ writer.addUserMetadata("big", bigBuf);
+ bigBuf.position(0);
+ writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42,
+ 42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
+ null, null, null, null));
+ writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
+ assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
+ reader.getMetadataValue("my.meta"));
+ assertEquals(bigBuf, reader.getMetadataValue("big"));
+ try {
+ reader.getMetadataValue("unknown");
+ assertTrue(false);
+ } catch (IllegalArgumentException iae) {
+ // PASS
+ }
+ int i = 0;
+ for(String key: reader.getMetadataKeys()) {
+ if ("my.meta".equals(key) ||
+ "clobber".equals(key) ||
+ "big".equals(key)) {
+ i += 1;
+ } else {
+ throw new IllegalArgumentException("unknown key " + key);
+ }
+ }
+ assertEquals(3, i);
+ }
+
+ /**
+ * We test union and timestamp separately since we need to make the
+ * object inspector manually. (The Hive reflection-based doesn't handle
+ * them properly.)
+ */
+ @Test
+ public void testUnionAndTimestamp() throws Exception {
+ List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).
+ addFieldNames("time").addFieldNames("union").
+ addSubtypes(1).addSubtypes(2).build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).
+ build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).
+ addSubtypes(3).addSubtypes(4).build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).
+ build());
+ types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).
+ build());
+
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = OrcStruct.createObjectInspector(0, types);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 1000, CompressionKind.NONE, 100, 10000);
+ OrcStruct row = new OrcStruct(2);
+ OrcUnion union = new OrcUnion();
+ row.setFieldValue(1, union);
+ row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
+ union.set((byte) 0, new IntWritable(42));
+ writer.addRow(row);
+ row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
+ union.set((byte)1, new Text("hello"));
+ writer.addRow(row);
+ row.setFieldValue(0, null);
+ row.setFieldValue(1, null);
+ writer.addRow(row);
+ row.setFieldValue(1, union);
+ union.set((byte) 0, null);
+ writer.addRow(row);
+ union.set((byte) 1, null);
+ writer.addRow(row);
+ union.set((byte) 0, new IntWritable(200000));
+ row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
+ writer.addRow(row);
+ for(int i=1900; i < 2200; ++i) {
+ row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
+ if ((i & 1) == 0) {
+ union.set((byte) 0, new IntWritable(i*i));
+ } else {
+ union.set((byte) 1, new Text(new Integer(i*i).toString()));
+ }
+ writer.addRow(row);
+ }
+ // let's add a lot of constant rows to test the rle
+ row.setFieldValue(0, null);
+ union.set((byte) 0, new IntWritable(1732050807));
+ for(int i=0; i < 1000; ++i) {
+ writer.addRow(row);
+ }
+ union.set((byte) 0, new IntWritable(0));
+ writer.addRow(row);
+ union.set((byte) 0, new IntWritable(10));
+ writer.addRow(row);
+ union.set((byte) 0, new IntWritable(138));
+ writer.addRow(row);
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+ assertEquals(1309, reader.getNumberOfRows());
+ int stripeCount = 0;
+ int rowCount = 0;
+ long currentOffset = -1;
+ for(StripeInformation stripe: reader.getStripes()) {
+ stripeCount += 1;
+ rowCount += stripe.getNumberOfRows();
+ if (currentOffset < 0) {
+ currentOffset = stripe.getOffset() + stripe.getIndexLength() +
+ stripe.getDataLength() + stripe.getFooterLength();
+ } else {
+ assertEquals(currentOffset, stripe.getOffset());
+ currentOffset += stripe.getIndexLength() +
+ stripe.getDataLength() + stripe.getFooterLength();
+ }
+ }
+ assertEquals(reader.getNumberOfRows(), rowCount);
+ assertEquals(2, stripeCount);
+ assertEquals(reader.getContentLength(), currentOffset);
+ RecordReader rows = reader.rows(null);
+ assertEquals(0, rows.getRowNumber());
+ assertEquals(0.0, rows.getProgress(), 0.000001);
+ assertEquals(true, rows.hasNext());
+ row = (OrcStruct) rows.next(null);
+ inspector = reader.getObjectInspector();
+ assertEquals("struct<time:timestamp,union:union{int, string}>",
+ inspector.getTypeName());
+ assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+ row.getFieldValue(0));
+ union = (OrcUnion) row.getFieldValue(1);
+ assertEquals(0, union.getTag());
+ assertEquals(new IntWritable(42), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
+ row.getFieldValue(0));
+ assertEquals(1, union.getTag());
+ assertEquals(new Text("hello"), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(null, row.getFieldValue(0));
+ assertEquals(null, row.getFieldValue(1));
+ row = (OrcStruct) rows.next(row);
+ assertEquals(null, row.getFieldValue(0));
+ union = (OrcUnion) row.getFieldValue(1);
+ assertEquals(0, union.getTag());
+ assertEquals(null, union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(null, row.getFieldValue(0));
+ assertEquals(1, union.getTag());
+ assertEquals(null, union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
+ row.getFieldValue(0));
+ assertEquals(new IntWritable(200000), union.getObject());
+ for(int i=1900; i < 2200; ++i) {
+ row = (OrcStruct) rows.next(row);
+ assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
+ row.getFieldValue(0));
+ if ((i & 1) == 0) {
+ assertEquals(0, union.getTag());
+ assertEquals(new IntWritable(i*i), union.getObject());
+ } else {
+ assertEquals(1, union.getTag());
+ assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
+ }
+ }
+ for(int i=0; i < 1000; ++i) {
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(1732050807), union.getObject());
+ }
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(0), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(10), union.getObject());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(new IntWritable(138), union.getObject());
+ assertEquals(false, rows.hasNext());
+ assertEquals(1.0, rows.getProgress(), 0.00001);
+ assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
+ rows.close();
+ }
+
+ /**
+ * Read and write a randomly generated snappy file.
+ * @throws Exception
+ */
+ @Test
+ public void testSnappy() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 1000, CompressionKind.SNAPPY, 100, 10000);
+ Random rand = new Random(12);
+ for(int i=0; i < 10000; ++i) {
+ writer.addRow(new InnerStruct(rand.nextInt(),
+ Integer.toHexString(rand.nextInt())));
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ RecordReader rows = reader.rows(null);
+ rand = new Random(12);
+ OrcStruct row = null;
+ for(int i=0; i < 10000; ++i) {
+ assertEquals(true, rows.hasNext());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(rand.nextInt(), ((IntWritable) row.getFieldValue(0)).get());
+ assertEquals(Integer.toHexString(rand.nextInt()),
+ row.getFieldValue(1).toString());
+ }
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+ /**
+ * Read and write a randomly generated snappy file.
+ * @throws Exception
+ */
+ @Test
+ public void testWithoutIndex() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 5000, CompressionKind.SNAPPY, 1000, 0);
+ Random rand = new Random(24);
+ for(int i=0; i < 10000; ++i) {
+ InnerStruct row = new InnerStruct(rand.nextInt(),
+ Integer.toBinaryString(rand.nextInt()));
+ for(int j=0; j< 5; ++j) {
+ writer.addRow(row);
+ }
+ }
+ writer.close();
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ assertEquals(50000, reader.getNumberOfRows());
+ assertEquals(0, reader.getRowIndexStride());
+ StripeInformation stripe = reader.getStripes().iterator().next();
+ assertEquals(true, stripe.getDataLength() != 0);
+ assertEquals(0, stripe.getIndexLength());
+ RecordReader rows = reader.rows(null);
+ rand = new Random(24);
+ OrcStruct row = null;
+ for(int i=0; i < 10000; ++i) {
+ int intVal = rand.nextInt();
+ String strVal = Integer.toBinaryString(rand.nextInt());
+ for(int j=0; j < 5; ++j) {
+ assertEquals(true, rows.hasNext());
+ row = (OrcStruct) rows.next(row);
+ assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
+ assertEquals(strVal, row.getFieldValue(1).toString());
+ }
+ }
+ assertEquals(false, rows.hasNext());
+ rows.close();
+ }
+
+ @Test
+ public void testSeek() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+ 200000, CompressionKind.ZLIB, 65536, 1000);
+ Random rand = new Random(42);
+ final int COUNT=32768;
+ long[] intValues= new long[COUNT];
+ double[] doubleValues = new double[COUNT];
+ String[] stringValues = new String[COUNT];
+ BytesWritable[] byteValues = new BytesWritable[COUNT];
+ String[] words = new String[128];
+ for(int i=0; i < words.length; ++i) {
+ words[i] = Integer.toHexString(rand.nextInt());
+ }
+ for(int i=0; i < COUNT/2; ++i) {
+ intValues[2*i] = rand.nextLong();
+ intValues[2*i+1] = intValues[2*i];
+ stringValues[2*i] = words[rand.nextInt(words.length)];
+ stringValues[2*i+1] = stringValues[2*i];
+ }
+ for(int i=0; i < COUNT; ++i) {
+ doubleValues[i] = rand.nextDouble();
+ byte[] buf = new byte[20];
+ rand.nextBytes(buf);
+ byteValues[i] = new BytesWritable(buf);
+ }
+ for(int i=0; i < COUNT; ++i) {
+ writer.addRow(createRandomRow(intValues, doubleValues, stringValues,
+ byteValues, words, i));
+ }
+ writer.close();
+ writer = null;
+ Reader reader = OrcFile.createReader(fs, testFilePath);
+ assertEquals(COUNT, reader.getNumberOfRows());
+ RecordReader rows = reader.rows(null);
+ OrcStruct row = null;
+ for(int i=COUNT-1; i >= 0; --i) {
+ rows.seekToRow(i);
+ row = (OrcStruct) rows.next(row);
+ BigRow expected = createRandomRow(intValues, doubleValues,
+ stringValues, byteValues, words, i);
+ assertEquals(expected.boolean1.booleanValue(),
+ ((BooleanWritable) row.getFieldValue(0)).get());
+ assertEquals(expected.byte1.byteValue(),
+ ((ByteWritable) row.getFieldValue(1)).get());
+ assertEquals(expected.short1.shortValue(),
+ ((ShortWritable) row.getFieldValue(2)).get());
+ assertEquals(expected.int1.intValue(),
+ ((IntWritable) row.getFieldValue(3)).get());
+ assertEquals(expected.long1.longValue(),
+ ((LongWritable) row.getFieldValue(4)).get());
+ assertEquals(expected.float1.floatValue(),
+ ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
+ assertEquals(expected.double1.doubleValue(),
+ ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
+ assertEquals(expected.bytes1, row.getFieldValue(7));
+ assertEquals(expected.string1, row.getFieldValue(8));
+ List<InnerStruct> expectedList = expected.middle.list;
+ List<OrcStruct> actualList =
+ (List) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
+ compareList(expectedList, actualList);
+ compareList(expected.list, (List) row.getFieldValue(10));
+ }
+ }
+
+ private void compareInner(InnerStruct expect,
+ OrcStruct actual) throws Exception {
+ if (expect == null || actual == null) {
+ assertEquals(expect, actual);
+ } else {
+ assertEquals(expect.int1, ((IntWritable) actual.getFieldValue(0)).get());
+ assertEquals(expect.string1, actual.getFieldValue(1));
+ }
+ }
+
+ private void compareList(List<InnerStruct> expect,
+ List<OrcStruct> actual) throws Exception {
+ assertEquals(expect.size(), actual.size());
+ for(int j=0; j < expect.size(); ++j) {
+ compareInner(expect.get(j), actual.get(j));
+ }
+ }
+
+ private BigRow createRandomRow(long[] intValues, double[] doubleValues,
+ String[] stringValues,
+ BytesWritable[] byteValues,
+ String[] words, int i) {
+ InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
+ InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
+ words[i % words.length] + "-x");
+ return new BigRow((intValues[i] & 1) == 0, (byte) intValues[i],
+ (short) intValues[i], (int) intValues[i], intValues[i],
+ (float) doubleValues[i], doubleValues[i], byteValues[i],stringValues[i],
+ new MiddleStruct(inner, inner2), list(), map(inner,inner2));
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestOrcStruct {
+
+ @Test
+ public void testStruct() throws Exception {
+ OrcStruct st1 = new OrcStruct(4);
+ OrcStruct st2 = new OrcStruct(4);
+ OrcStruct st3 = new OrcStruct(3);
+ st1.setFieldValue(0, "hop");
+ st1.setFieldValue(1, "on");
+ st1.setFieldValue(2, "pop");
+ st1.setFieldValue(3, 42);
+ assertEquals(false, st1.equals(null));
+ st2.setFieldValue(0, "hop");
+ st2.setFieldValue(1, "on");
+ st2.setFieldValue(2, "pop");
+ st2.setFieldValue(3, 42);
+ assertEquals(st1, st2);
+ st3.setFieldValue(0, "hop");
+ st3.setFieldValue(1, "on");
+ st3.setFieldValue(2, "pop");
+ assertEquals(false, st1.equals(st3));
+ assertEquals(11241, st1.hashCode());
+ assertEquals(st1.hashCode(), st2.hashCode());
+ assertEquals(11204, st3.hashCode());
+ assertEquals("{hop, on, pop, 42}", st1.toString());
+ st1.setFieldValue(3, null);
+ assertEquals(false, st1.equals(st2));
+ assertEquals(false, st2.equals(st1));
+ st2.setFieldValue(3, null);
+ assertEquals(st1, st2);
+ }
+
+ @Test
+ public void testInspectorFromTypeInfo() throws Exception {
+ TypeInfo typeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
+ ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
+ "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
+ ",c13:array<timestamp>>");
+ StructObjectInspector inspector = (StructObjectInspector)
+ OrcStruct.createObjectInspector(typeInfo);
+ assertEquals("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
+ "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
+ "c1:int>,c11:map<int,int>,c12:union{int},c13:array<timestamp>>",
+ inspector.getTypeName());
+ assertEquals(null,
+ inspector.getAllStructFieldRefs().get(0).getFieldComment());
+ assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
+ OrcStruct s1 = new OrcStruct(13);
+ for(int i=0; i < 13; ++i) {
+ s1.setFieldValue(i, i);
+ }
+
+ List<Object> list = new ArrayList<Object>();
+ list.addAll(Arrays.asList(0,1,2,3,4,5,6,7,8,9,10,11,12));
+ assertEquals(list, inspector.getStructFieldsDataAsList(s1));
+ ListObjectInspector listOI = (ListObjectInspector)
+ inspector.getAllStructFieldRefs().get(12).getFieldObjectInspector();
+ assertEquals(ObjectInspector.Category.LIST, listOI.getCategory());
+ assertEquals(10, listOI.getListElement(list, 10));
+ assertEquals(13, listOI.getListLength(list));
+
+ Map<Integer, Integer> map = new HashMap<Integer,Integer>();
+ map.put(1,2);
+ map.put(2,4);
+ map.put(3,6);
+ MapObjectInspector mapOI = (MapObjectInspector)
+ inspector.getAllStructFieldRefs().get(10).getFieldObjectInspector();
+ assertEquals(3, mapOI.getMapSize(map));
+ assertEquals(4, mapOI.getMapValueElement(map, 2));
+ }
+
+ @Test
+ public void testUnion() throws Exception {
+ OrcUnion un1 = new OrcUnion();
+ OrcUnion un2 = new OrcUnion();
+ un1.set((byte) 0, "hi");
+ un2.set((byte) 0, "hi");
+ assertEquals(un1, un2);
+ assertEquals(un1.hashCode(), un2.hashCode());
+ un2.set((byte) 0, null);
+ assertEquals(false, un1.equals(un2));
+ assertEquals(false, un2.equals(un1));
+ un1.set((byte) 0, null);
+ assertEquals(un1, un2);
+ un2.set((byte) 0, "hi");
+ un1.set((byte) 1, "hi");
+ assertEquals(false, un1.equals(un2));
+ assertEquals(false, un1.hashCode() == un2.hashCode());
+ un2.set((byte) 1, "byte");
+ assertEquals(false, un1.equals(un2));
+ assertEquals("union(1, hi)", un1.toString());
+ assertEquals(false, un1.equals(null));
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+
+public class TestRunLengthByteReader {
+
+ @Test
+ public void testUncompressedSeek() throws Exception {
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
+ null, collect));
+ TestInStream.PositionCollector[] positions =
+ new TestInStream.PositionCollector[2048];
+ for(int i=0; i < 2048; ++i) {
+ positions[i] = new TestInStream.PositionCollector();
+ out.getPosition(positions[i]);
+ if (i < 1024) {
+ out.write((byte) (i/4));
+ } else {
+ out.write((byte) i);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+ inBuf, null, 100));
+ for(int i=0; i < 2048; ++i) {
+ int x = in.next() & 0xff;
+ if (i < 1024) {
+ assertEquals((i/4) & 0xff, x);
+ } else {
+ assertEquals(i & 0xff, x);
+ }
+ }
+ for(int i=2047; i >= 0; --i) {
+ in.seek(positions[i]);
+ int x = in.next() & 0xff;
+ if (i < 1024) {
+ assertEquals((i/4) & 0xff, x);
+ } else {
+ assertEquals(i & 0xff, x);
+ }
+ }
+ }
+
+ @Test
+ public void testCompressedSeek() throws Exception {
+ CompressionCodec codec = new SnappyCodec();
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 500,
+ codec, collect));
+ TestInStream.PositionCollector[] positions =
+ new TestInStream.PositionCollector[2048];
+ for(int i=0; i < 2048; ++i) {
+ positions[i] = new TestInStream.PositionCollector();
+ out.getPosition(positions[i]);
+ if (i < 1024) {
+ out.write((byte) (i/4));
+ } else {
+ out.write((byte) i);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+ inBuf, codec, 500));
+ for(int i=0; i < 2048; ++i) {
+ int x = in.next() & 0xff;
+ if (i < 1024) {
+ assertEquals((i/4) & 0xff, x);
+ } else {
+ assertEquals(i & 0xff, x);
+ }
+ }
+ for(int i=2047; i >= 0; --i) {
+ in.seek(positions[i]);
+ int x = in.next() & 0xff;
+ if (i < 1024) {
+ assertEquals((i/4) & 0xff, x);
+ } else {
+ assertEquals(i & 0xff, x);
+ }
+ }
+ }
+
+ @Test
+ public void testSkips() throws Exception {
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
+ null, collect));
+ for(int i=0; i < 2048; ++i) {
+ if (i < 1024) {
+ out.write((byte) (i/16));
+ } else {
+ out.write((byte) i);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+ inBuf, null, 100));
+ for(int i=0; i < 2048; i += 10) {
+ int x = in.next() & 0xff;
+ if (i < 1024) {
+ assertEquals((i/16) & 0xff, x);
+ } else {
+ assertEquals(i & 0xff, x);
+ }
+ if (i < 2038) {
+ in.skip(9);
+ }
+ in.skip(0);
+ }
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import static junit.framework.Assert.assertEquals;
+
+public class TestRunLengthIntegerReader {
+
+ public void runSeekTest(CompressionCodec codec) throws Exception {
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+ new OutStream("test", 1000, codec, collect), true);
+ TestInStream.PositionCollector[] positions =
+ new TestInStream.PositionCollector[4096];
+ Random random = new Random(99);
+ int[] junk = new int[2048];
+ for(int i=0; i < junk.length; ++i) {
+ junk[i] = random.nextInt();
+ }
+ for(int i=0; i < 4096; ++i) {
+ positions[i] = new TestInStream.PositionCollector();
+ out.getPosition(positions[i]);
+ // test runs, incrementing runs, non-runs
+ if (i < 1024) {
+ out.write(i/4);
+ } else if (i < 2048) {
+ out.write(2*i);
+ } else {
+ out.write(junk[i-2048]);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+ ("test", inBuf, codec, 1000), true);
+ for(int i=0; i < 2048; ++i) {
+ int x = (int) in.next();
+ if (i < 1024) {
+ assertEquals(i/4, x);
+ } else if (i < 2048) {
+ assertEquals(2*i, x);
+ } else {
+ assertEquals(junk[i-2048], x);
+ }
+ }
+ for(int i=2047; i >= 0; --i) {
+ in.seek(positions[i]);
+ int x = (int) in.next();
+ if (i < 1024) {
+ assertEquals(i/4, x);
+ } else if (i < 2048) {
+ assertEquals(2*i, x);
+ } else {
+ assertEquals(junk[i-2048], x);
+ }
+ }
+ }
+
+ @Test
+ public void testUncompressedSeek() throws Exception {
+ runSeekTest(null);
+ }
+
+ @Test
+ public void testCompressedSeek() throws Exception {
+ runSeekTest(new ZlibCodec());
+ }
+
+ @Test
+ public void testSkips() throws Exception {
+ TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+ RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+ new OutStream("test", 100, null, collect), true);
+ for(int i=0; i < 2048; ++i) {
+ if (i < 1024) {
+ out.write(i);
+ } else {
+ out.write(256 * i);
+ }
+ }
+ out.flush();
+ ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+ collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+ inBuf.flip();
+ RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+ ("test", inBuf, null, 100), true);
+ for(int i=0; i < 2048; i += 10) {
+ int x = (int) in.next();
+ if (i < 1024) {
+ assertEquals(i, x);
+ } else {
+ assertEquals(256 * i, x);
+ }
+ if (i < 2038) {
+ in.skip(9);
+ }
+ in.skip(0);
+ }
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestSerializationUtils {
+
+ @Test
+ public void TestDoubles() throws Exception {
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ SerializationUtils.writeDouble(buffer, 1343822337.759);
+ assertEquals(1343822337.759,
+ SerializationUtils.readDouble(new
+ ByteArrayInputStream(buffer.toByteArray())), 0.0001);
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestStreamName {
+
+ @Test
+ public void test1() throws Exception {
+ StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
+ StreamName s2 = new StreamName(3,
+ OrcProto.Stream.Kind.DICTIONARY_DATA);
+ StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
+ StreamName s4 = new StreamName(5,
+ OrcProto.Stream.Kind.DICTIONARY_DATA);
+ StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
+ assertEquals(true, s1.equals(s1));
+ assertEquals(false, s1.equals(s2));
+ assertEquals(false, s1.equals(s3));
+ assertEquals(true, s1.equals(s1p));
+ assertEquals(true, s1.compareTo(null) < 0);
+ assertEquals(false, s1.equals(null));
+ assertEquals(true, s1.compareTo(s2) < 0);
+ assertEquals(true, s2.compareTo(s3) < 0);
+ assertEquals(true, s3.compareTo(s4) < 0);
+ assertEquals(true, s4.compareTo(s1p) > 0);
+ assertEquals(0, s1p.compareTo(s1));
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Test the red-black tree with string keys.
+ */
+public class TestStringRedBlackTree {
+
+ /**
+ * Checks the red-black tree rules to make sure that we have correctly built
+ * a valid tree.
+ *
+ * Properties:
+ * 1. Red nodes must have black children
+ * 2. Each node must have the same black height on both sides.
+ *
+ * @param node The id of the root of the subtree to check for the red-black
+ * tree properties.
+ * @return The black-height of the subtree.
+ */
+ private int checkSubtree(RedBlackTree tree, int node, IntWritable count
+ ) throws IOException {
+ if (node == RedBlackTree.NULL) {
+ return 1;
+ }
+ count.set(count.get() + 1);
+ boolean is_red = tree.isRed(node);
+ int left = tree.getLeft(node);
+ int right = tree.getRight(node);
+ if (is_red) {
+ if (tree.isRed(left)) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Left node of " + node + " is " + left +
+ " and both are red.");
+ }
+ if (tree.isRed(right)) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Right node of " + node + " is " +
+ right + " and both are red.");
+ }
+ }
+ int left_depth = checkSubtree(tree, left, count);
+ int right_depth = checkSubtree(tree, right, count);
+ if (left_depth != right_depth) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Lopsided tree at node " + node +
+ " with depths " + left_depth + " and " + right_depth);
+ }
+ if (is_red) {
+ return left_depth;
+ } else {
+ return left_depth + 1;
+ }
+ }
+
+ /**
+ * Checks the validity of the entire tree. Also ensures that the number of
+ * nodes visited is the same as the size of the set.
+ */
+ void checkTree(RedBlackTree tree) throws IOException {
+ IntWritable count = new IntWritable(0);
+ if (tree.isRed(tree.root)) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("root is red");
+ }
+ checkSubtree(tree, tree.root, count);
+ if (count.get() != tree.size) {
+ printTree(tree, "", tree.root);
+ throw new IllegalStateException("Broken tree! visited= " + count.get() +
+ " size=" + tree.size);
+ }
+ }
+
+ void printTree(RedBlackTree tree, String indent, int node
+ ) throws IOException {
+ if (node == RedBlackTree.NULL) {
+ System.err.println(indent + "NULL");
+ } else {
+ System.err.println(indent + "Node " + node + " color " +
+ (tree.isRed(node) ? "red" : "black") + " count " + tree.getCount(node));
+ printTree(tree, indent + " ", tree.getLeft(node));
+ printTree(tree, indent + " ", tree.getRight(node));
+ }
+ }
+
+ private static class MyVisitor implements StringRedBlackTree.Visitor {
+ private final String[] words;
+ private final int[] counts;
+ private final int[] order;
+ private final DataOutputBuffer buffer = new DataOutputBuffer();
+ int current = 0;
+
+ MyVisitor(String[] args, int[] counts, int[] order) {
+ words = args;
+ this.counts = counts;
+ this.order = order;
+ }
+
+ @Override
+ public void visit(StringRedBlackTree.VisitorContext context
+ ) throws IOException {
+ String word = context.getText().toString();
+ assertEquals("in word " + current, words[current], word);
+ assertEquals("in word " + current, counts[current], context.getCount());
+ assertEquals("in word " + current, order[current],
+ context.getOriginalPosition());
+ buffer.reset();
+ context.writeBytes(buffer);
+ assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
+ current += 1;
+ }
+ }
+
+ void checkContents(StringRedBlackTree tree, int[] counts, int[] order,
+ String... params
+ ) throws IOException {
+ tree.visit(new MyVisitor(params, counts, order));
+ }
+
+ StringRedBlackTree buildTree(String... params) throws IOException {
+ StringRedBlackTree result = new StringRedBlackTree();
+ for(String word: params) {
+ result.add(word);
+ checkTree(result);
+ }
+ return result;
+ }
+
+ @Test
+ public void test1() throws Exception {
+ StringRedBlackTree tree = new StringRedBlackTree(5);
+ assertEquals(0, tree.getByteSize());
+ checkTree(tree);
+ assertEquals(0, tree.add("owen"));
+ checkTree(tree);
+ assertEquals(1, tree.add("ashutosh"));
+ checkTree(tree);
+ assertEquals(0, tree.add("owen"));
+ checkTree(tree);
+ assertEquals(2, tree.add("alan"));
+ checkTree(tree);
+ assertEquals(2, tree.add("alan"));
+ checkTree(tree);
+ assertEquals(1, tree.add("ashutosh"));
+ checkTree(tree);
+ assertEquals(3, tree.add("greg"));
+ checkTree(tree);
+ assertEquals(4, tree.add("eric"));
+ checkTree(tree);
+ assertEquals(5, tree.add("arun"));
+ checkTree(tree);
+ assertEquals(6, tree.size());
+ checkTree(tree);
+ assertEquals(6, tree.add("eric14"));
+ checkTree(tree);
+ assertEquals(7, tree.add("o"));
+ checkTree(tree);
+ assertEquals(8, tree.add("ziggy"));
+ checkTree(tree);
+ assertEquals(9, tree.add("z"));
+ checkTree(tree);
+ checkContents(tree, new int[]{2,1,2,1,1,1,1,2,1,1},
+ new int[]{2,5,1,4,6,3,7,0,9,8},
+ "alan", "arun", "ashutosh", "eric", "eric14", "greg",
+ "o", "owen", "z", "ziggy");
+ assertEquals(10*5*4 + 8 + 6 + 5 + 5 * 4 + 2 * 1, tree.getByteSize());
+ // check that adding greg again bumps the count
+ assertEquals(1, tree.getCount(3));
+ assertEquals(3, tree.add("greg"));
+ assertEquals(2, tree.getCount(3));
+ assertEquals(41, tree.getCharacterSize());
+ // add some more strings to test the different branches of the
+ // rebalancing
+ assertEquals(10, tree.add("zak"));
+ checkTree(tree);
+ assertEquals(11, tree.add("eric1"));
+ checkTree(tree);
+ assertEquals(12, tree.add("ash"));
+ checkTree(tree);
+ assertEquals(13, tree.add("harry"));
+ checkTree(tree);
+ assertEquals(14, tree.add("john"));
+ checkTree(tree);
+ tree.clear();
+ checkTree(tree);
+ assertEquals(0, tree.getByteSize());
+ assertEquals(0, tree.getCharacterSize());
+ }
+
+ @Test
+ public void test2() throws Exception {
+ StringRedBlackTree tree =
+ buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+ "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+ assertEquals(26, tree.size());
+ checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
+ 1,1,1, 1,1,1, 1,1}, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+ 15,16,17, 18,19,20, 21,22,23, 24,25},
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
+ "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+ }
+
+ @Test
+ public void test3() throws Exception {
+ StringRedBlackTree tree =
+ buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
+ "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
+ assertEquals(26, tree.size());
+ checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
+ 1,1,1, 1,1,1, 1,1}, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+ 13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+ "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+ }
+
+ public static void main(String[] args) throws Exception {
+ TestStringRedBlackTree test = new TestStringRedBlackTree();
+ test.test1();
+ test.test2();
+ test.test3();
+ TestSerializationUtils serUtils = new TestSerializationUtils();
+ serUtils.TestDoubles();
+ TestDynamicArray test6 = new TestDynamicArray();
+ test6.testByteArray();
+ test6.testIntArray();
+ TestZlib zlib = new TestZlib();
+ zlib.testCorrupt();
+ zlib.testNoOverflow();
+ TestInStream inStreamTest = new TestInStream();
+ inStreamTest.testUncompressed();
+ inStreamTest.testCompressed();
+ inStreamTest.testCorruptStream();
+ TestRunLengthByteReader rleByte = new TestRunLengthByteReader();
+ rleByte.testUncompressedSeek();
+ rleByte.testCompressedSeek();
+ rleByte.testSkips();
+ TestRunLengthIntegerReader rleInt = new TestRunLengthIntegerReader();
+ rleInt.testUncompressedSeek();
+ rleInt.testCompressedSeek();
+ rleInt.testSkips();
+ TestBitFieldReader bit = new TestBitFieldReader();
+ bit.testUncompressedSeek();
+ bit.testCompressedSeek();
+ bit.testBiggerItems();
+ bit.testSkips();
+ TestOrcFile test1 = new TestOrcFile();
+ test1.test1();
+ test1.emptyFile();
+ test1.metaData();
+ test1.testUnionAndTimestamp();
+ test1.columnProjection();
+ test1.testSnappy();
+ test1.testWithoutIndex();
+ test1.testSeek();
+ TestFileDump test2 = new TestFileDump();
+ test2.testDump();
+ TestStreamName test3 = new TestStreamName();
+ test3.test1();
+ TestInputOutputFormat test4 = new TestInputOutputFormat();
+ test4.testInOutFormat();
+ test4.testMROutput();
+ test4.testEmptyFile();
+ test4.testDefaultTypes();
+ TestOrcStruct test5 = new TestOrcStruct();
+ test5.testStruct();
+ test5.testInspectorFromTypeInfo();
+ test5.testUnion();
+ }
+}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java Tue Mar 5 20:44:50 2013
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+public class TestZlib {
+
+ @Test
+ public void testNoOverflow() throws Exception {
+ ByteBuffer in = ByteBuffer.allocate(10);
+ ByteBuffer out = ByteBuffer.allocate(10);
+ in.put(new byte[]{1,2,3,4,5,6,7,10});
+ in.flip();
+ CompressionCodec codec = new ZlibCodec();
+ assertEquals(false, codec.compress(in, out, null));
+ }
+
+ @Test
+ public void testCorrupt() throws Exception {
+ ByteBuffer buf = ByteBuffer.allocate(1000);
+ buf.put(new byte[]{127,-128,0,99,98,-1});
+ buf.flip();
+ CompressionCodec codec = new ZlibCodec();
+ ByteBuffer out = ByteBuffer.allocate(1000);
+ try {
+ codec.decompress(buf, out);
+ fail();
+ } catch (IOException ioe) {
+ // EXPECTED
+ }
+ }
+}