You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2013/03/05 21:44:52 UTC
svn commit: r1452992 [7/8] - in /hive/trunk: ./ ivy/ ql/ ql/src/gen/protobuf/ ql/src/gen/protobuf/gen-java/ ql/src/gen/protobuf/gen-java/org/ ql/src/gen/protobuf/gen-java/org/apache/ ql/src/gen/protobuf/gen-java/org/apache/hadoop/ ql/src/gen/protobuf/g...

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,360 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestInputOutputFormat {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir","target/test/tmp"));
+
+  public static class MyRow implements Writable {
+    int x;
+    int y;
+    MyRow(int x, int y) {
+      this.x = x;
+      this.y = y;
+    }
+
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+      throw new UnsupportedOperationException("no write");
+    }
+
+    @Override
+    public void readFields(DataInput dataInput) throws IOException {
+     throw new UnsupportedOperationException("no read");
+    }
+  }
+
+  @Rule
+  public TestName testCaseName = new TestName();
+  JobConf conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new JobConf();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestInputOutputFormat." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testInOutFormat() throws Exception {
+    Properties properties = new Properties();
+    StructObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = (StructObjectInspector)
+          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    SerDe serde = new OrcSerde();
+    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+    FileSinkOperator.RecordWriter writer =
+        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
+            properties, Reporter.NULL);
+    writer.write(serde.serialize(new MyRow(1,2), inspector));
+    writer.write(serde.serialize(new MyRow(2,2), inspector));
+    writer.write(serde.serialize(new MyRow(3,2), inspector));
+    writer.close(true);
+    serde = new OrcSerde();
+    properties.setProperty("columns", "x,y");
+    properties.setProperty("columns.types", "int:int");
+    serde.initialize(conf, properties);
+    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
+    inspector = (StructObjectInspector) serde.getObjectInspector();
+    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
+    InputFormat<?,?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(1, splits.length);
+
+    // the the validate input method
+    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
+    assertEquals(false,
+        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
+    fileList.add(fs.getFileStatus(testFilePath));
+    assertEquals(true,
+        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
+    fileList.add(fs.getFileStatus(workDir));
+    assertEquals(false,
+        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
+
+
+    // read the whole file
+    org.apache.hadoop.mapred.RecordReader reader =
+        in.getRecordReader(splits[0], conf, Reporter.NULL);
+    Object key = reader.createKey();
+    Writable value = (Writable) reader.createValue();
+    int rowNum = 0;
+    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
+    IntObjectInspector intInspector =
+        (IntObjectInspector) fields.get(0).getFieldObjectInspector();
+    assertEquals(0.0, reader.getProgress(), 0.00001);
+    assertEquals(0, reader.getPos());
+    while (reader.next(key, value)) {
+      assertEquals(++rowNum, intInspector.get(inspector.
+          getStructFieldData(serde.deserialize(value), fields.get(0))));
+      assertEquals(2, intInspector.get(inspector.
+          getStructFieldData(serde.deserialize(value), fields.get(1))));
+    }
+    assertEquals(3, rowNum);
+    assertEquals(1.0, reader.getProgress(), 0.00001);
+    reader.close();
+
+    // read just the first column
+    conf.set("hive.io.file.readcolumn.ids", "0");
+    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
+    key = reader.createKey();
+    value = (Writable) reader.createValue();
+    rowNum = 0;
+    fields = inspector.getAllStructFieldRefs();
+    while (reader.next(key, value)) {
+      assertEquals(++rowNum, intInspector.get(inspector.
+          getStructFieldData(value, fields.get(0))));
+      assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
+    }
+    assertEquals(3, rowNum);
+    reader.close();
+  }
+
+  static class NestedRow implements Writable {
+    int z;
+    MyRow r;
+    NestedRow(int x, int y, int z) {
+      this.z = z;
+      this.r = new MyRow(x,y);
+    }
+
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+      throw new UnsupportedOperationException("unsupported");
+    }
+
+    @Override
+    public void readFields(DataInput dataInput) throws IOException {
+      throw new UnsupportedOperationException("unsupported");
+    }
+  }
+
+  @Test
+  public void testMROutput() throws Exception {
+    JobConf job = new JobConf(conf);
+    Properties properties = new Properties();
+    StructObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = (StructObjectInspector)
+          ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    SerDe serde = new OrcSerde();
+    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
+    RecordWriter writer =
+        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
+            Reporter.NULL);
+    writer.write(NullWritable.get(),
+        serde.serialize(new NestedRow(1,2,3), inspector));
+    writer.write(NullWritable.get(),
+        serde.serialize(new NestedRow(4,5,6), inspector));
+    writer.write(NullWritable.get(),
+        serde.serialize(new NestedRow(7,8,9), inspector));
+    writer.close(Reporter.NULL);
+    serde = new OrcSerde();
+    properties.setProperty("columns", "z,r");
+    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
+    serde.initialize(conf, properties);
+    inspector = (StructObjectInspector) serde.getObjectInspector();
+    InputFormat<?,?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(1, splits.length);
+    conf.set("hive.io.file.readcolumn.ids", "1");
+    org.apache.hadoop.mapred.RecordReader reader =
+        in.getRecordReader(splits[0], conf, Reporter.NULL);
+    Object key = reader.createKey();
+    Object value = reader.createValue();
+    int rowNum = 0;
+    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
+    StructObjectInspector inner = (StructObjectInspector)
+        fields.get(1).getFieldObjectInspector();
+    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+    IntObjectInspector intInspector =
+        (IntObjectInspector) fields.get(0).getFieldObjectInspector();
+    while (reader.next(key, value)) {
+      assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
+      Object sub = inspector.getStructFieldData(value, fields.get(1));
+      assertEquals(3*rowNum+1, intInspector.get(inner.getStructFieldData(sub,
+          inFields.get(0))));
+      assertEquals(3*rowNum+2, intInspector.get(inner.getStructFieldData(sub,
+          inFields.get(1))));
+      rowNum += 1;
+    }
+    assertEquals(3, rowNum);
+    reader.close();
+
+  }
+
+  @Test
+  public void testEmptyFile() throws Exception {
+    JobConf job = new JobConf(conf);
+    Properties properties = new Properties();
+    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+    FileSinkOperator.RecordWriter writer =
+        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
+            properties, Reporter.NULL);
+    writer.close(true);
+    properties.setProperty("columns", "x,y");
+    properties.setProperty("columns.types", "int:int");
+    SerDe serde = new OrcSerde();
+    serde.initialize(conf, properties);
+    InputFormat<?,?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(1, splits.length);
+
+    // read the whole file
+    conf.set("hive.io.file.readcolumn.ids", "0,1");
+    org.apache.hadoop.mapred.RecordReader reader =
+        in.getRecordReader(splits[0], conf, Reporter.NULL);
+    Object key = reader.createKey();
+    Object value = reader.createValue();
+    assertEquals(0.0, reader.getProgress(), 0.00001);
+    assertEquals(0, reader.getPos());
+    assertEquals(false, reader.next(key, value));
+    reader.close();
+    assertEquals(null, serde.getSerDeStats());
+  }
+
+  static class StringRow implements Writable {
+    String str;
+    String str2;
+    StringRow(String s) {
+      str = s;
+      str2 = s;
+    }
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+      throw new UnsupportedOperationException("no write");
+    }
+
+    @Override
+    public void readFields(DataInput dataInput) throws IOException {
+      throw new UnsupportedOperationException("no read");
+    }
+  }
+
+  @Test
+  public void testDefaultTypes() throws Exception {
+    JobConf job = new JobConf(conf);
+    Properties properties = new Properties();
+    StructObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = (StructObjectInspector)
+          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    SerDe serde = new OrcSerde();
+    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
+    FileSinkOperator.RecordWriter writer =
+        outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
+            true, properties, Reporter.NULL);
+    writer.write(serde.serialize(new StringRow("owen"), inspector));
+    writer.write(serde.serialize(new StringRow("beth"), inspector));
+    writer.write(serde.serialize(new StringRow("laurel"), inspector));
+    writer.write(serde.serialize(new StringRow("hazen"), inspector));
+    writer.write(serde.serialize(new StringRow("colin"), inspector));
+    writer.write(serde.serialize(new StringRow("miles"), inspector));
+    writer.close(true);
+    serde = new OrcSerde();
+    properties.setProperty("columns", "str,str2");
+    serde.initialize(conf, properties);
+    inspector = (StructObjectInspector) serde.getObjectInspector();
+    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
+    InputFormat<?,?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(1, splits.length);
+
+    // read the whole file
+    org.apache.hadoop.mapred.RecordReader reader =
+        in.getRecordReader(splits[0], conf, Reporter.NULL);
+    Object key = reader.createKey();
+    Writable value = (Writable) reader.createValue();
+    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
+    StringObjectInspector strInspector = (StringObjectInspector)
+        fields.get(0).getFieldObjectInspector();
+    assertEquals(true, reader.next(key, value));
+    assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.
+        getStructFieldData(value, fields.get(0))));
+    assertEquals(true, reader.next(key, value));
+    assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.
+        getStructFieldData(value, fields.get(0))));
+    assertEquals(true, reader.next(key, value));
+    assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.
+        getStructFieldData(value, fields.get(0))));
+    assertEquals(true, reader.next(key, value));
+    assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.
+        getStructFieldData(value, fields.get(0))));
+    assertEquals(true, reader.next(key, value));
+    assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.
+        getStructFieldData(value, fields.get(0))));
+    assertEquals(true, reader.next(key, value));
+    assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.
+        getStructFieldData(value, fields.get(0))));
+    assertEquals(false, reader.next(key, value));
+    reader.close();
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,896 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static junit.framework.Assert.*;
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Tests for the top level reader/streamFactory of ORC files.
+ */
+public class TestOrcFile {
+
+  public static class InnerStruct {
+    int int1;
+    Text string1 = new Text();
+    InnerStruct(int int1, String string1) {
+      this.int1 = int1;
+      this.string1.set(string1);
+    }
+  }
+
+  public static class MiddleStruct {
+    List<InnerStruct> list = new ArrayList<InnerStruct>();
+
+    MiddleStruct(InnerStruct... items) {
+      list.clear();
+      for(InnerStruct item: items) {
+        list.add(item);
+      }
+    }
+  }
+
+  public static class BigRow {
+    Boolean boolean1;
+    Byte byte1;
+    Short short1;
+    Integer int1;
+    Long long1;
+    Float float1;
+    Double double1;
+    BytesWritable bytes1;
+    Text string1;
+    MiddleStruct middle;
+    List<InnerStruct> list = new ArrayList<InnerStruct>();
+    Map<Text, InnerStruct> map = new HashMap<Text, InnerStruct>();
+
+    BigRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
+           Double d1,
+           BytesWritable b3, String s2, MiddleStruct m1,
+           List<InnerStruct> l2, Map<Text, InnerStruct> m2) {
+      this.boolean1 = b1;
+      this.byte1 = b2;
+      this.short1 = s1;
+      this.int1 = i1;
+      this.long1 = l1;
+      this.float1 = f1;
+      this.double1 = d1;
+      this.bytes1 = b3;
+      if (s2 == null) {
+        this.string1 = null;
+      } else {
+        this.string1 = new Text(s2);
+      }
+      this.middle = m1;
+      this.list = l2;
+      this.map = m2;
+    }
+  }
+
+  private static InnerStruct inner(int i, String s) {
+    return new InnerStruct(i, s);
+  }
+
+  private static Map<Text, InnerStruct> map(InnerStruct... items)  {
+    Map<Text, InnerStruct> result = new HashMap<Text, InnerStruct>();
+    for(InnerStruct i: items) {
+      result.put(new Text(i.string1), i);
+    }
+    return result;
+  }
+
+  private static List<InnerStruct> list(InnerStruct... items) {
+    List<InnerStruct> result = new ArrayList<InnerStruct>();
+    for(InnerStruct s: items) {
+      result.add(s);
+    }
+    return result;
+  }
+
+  private static BytesWritable bytes(int... items) {
+    BytesWritable result = new BytesWritable();
+    result.setSize(items.length);
+    for(int i=0; i < items.length; ++i) {
+      result.getBytes()[i] = (byte) items[i];
+    }
+    return result;
+  }
+
+  private static ByteBuffer byteBuf(int... items) {
+     ByteBuffer result = ByteBuffer.allocate(items.length);
+    for(int item: items) {
+      result.put((byte) item);
+    }
+    return result;
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void test1() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        100000, CompressionKind.ZLIB, 10000, 10000);
+    writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
+        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
+        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+        list(inner(3, "good"), inner(4, "bad")),
+        map()));
+    writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
+        Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+        list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
+        map(inner(5,"chani"), inner(1,"mauddib"))));
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(2, stats[1].getNumberOfValues());
+    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+    assertEquals("count: 2 true: 1", stats[1].toString());
+
+    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+    assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
+    assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
+        stats[3].toString());
+
+    assertEquals(Long.MAX_VALUE,
+        ((IntegerColumnStatistics) stats[5]).getMaximum());
+    assertEquals(Long.MAX_VALUE,
+        ((IntegerColumnStatistics) stats[5]).getMinimum());
+    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+    assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
+        stats[5].toString());
+
+    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
+    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
+    assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
+    assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
+        stats[7].toString());
+
+    assertEquals("count: 2 min: bye max: hi", stats[9].toString());
+
+    // check the inspectors
+    StructObjectInspector readerInspector =
+        (StructObjectInspector) reader.getObjectInspector();
+    assertEquals(ObjectInspector.Category.STRUCT,
+        readerInspector.getCategory());
+    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
+        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
+        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
+        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
+        + "map:map<string,struct<int1:int,string1:string>>>",
+        readerInspector.getTypeName());
+    List<? extends StructField> fields =
+        readerInspector.getAllStructFieldRefs();
+    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
+        getStructFieldRef("boolean1").getFieldObjectInspector();
+    ByteObjectInspector by = (ByteObjectInspector) readerInspector.
+        getStructFieldRef("byte1").getFieldObjectInspector();
+    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
+        getStructFieldRef("short1").getFieldObjectInspector();
+    IntObjectInspector in = (IntObjectInspector) readerInspector.
+        getStructFieldRef("int1").getFieldObjectInspector();
+    LongObjectInspector lo = (LongObjectInspector) readerInspector.
+        getStructFieldRef("long1").getFieldObjectInspector();
+    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
+        getStructFieldRef("float1").getFieldObjectInspector();
+    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
+        getStructFieldRef("double1").getFieldObjectInspector();
+    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+        getStructFieldRef("bytes1").getFieldObjectInspector();
+    StringObjectInspector st = (StringObjectInspector) readerInspector.
+        getStructFieldRef("string1").getFieldObjectInspector();
+    StructObjectInspector mid = (StructObjectInspector) readerInspector.
+        getStructFieldRef("middle").getFieldObjectInspector();
+    List<? extends StructField> midFields =
+        mid.getAllStructFieldRefs();
+    ListObjectInspector midli =
+        (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
+    StructObjectInspector inner = (StructObjectInspector)
+        midli.getListElementObjectInspector();
+    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+    ListObjectInspector li = (ListObjectInspector) readerInspector.
+        getStructFieldRef("list").getFieldObjectInspector();
+    MapObjectInspector ma = (MapObjectInspector) readerInspector.
+        getStructFieldRef("map").getFieldObjectInspector();
+    StructObjectInspector lc = (StructObjectInspector)
+        li.getListElementObjectInspector();
+    StringObjectInspector mk = (StringObjectInspector)
+        ma.getMapKeyObjectInspector();
+    StructObjectInspector mv = (StructObjectInspector)
+        ma.getMapValueObjectInspector();
+    RecordReader rows = reader.rows(null);
+    Object row = rows.next(null);
+    assertNotNull(row);
+    // check the contents of the first row
+    assertEquals(false,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(1, by.get(readerInspector.getStructFieldData(row,
+        fields.get(1))));
+    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
+        fields.get(2))));
+    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+        fields.get(3))));
+    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+        getStructFieldData(row, fields.get(4))));
+    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
+        fields.get(5))), 0.00001);
+    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
+        fields.get(6))), 0.00001);
+    assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(7))));
+    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(8))));
+    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.
+        getStructFieldData(row, fields.get(9)), midFields.get(0)));
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+        inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(0), inFields.get(1))));
+    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+        inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(1), inFields.get(1))));
+    List<?> list = li.getList(readerInspector.getStructFieldData(row,
+        fields.get(10)));
+    assertEquals(2, list.size());
+    assertEquals(3, in.get(inner.getStructFieldData(list.get(0),
+        inFields.get(0))));
+    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(0), inFields.get(1))));
+    assertEquals(4, in.get(inner.getStructFieldData(list.get(1),
+        inFields.get(0))));
+    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(1), inFields.get(1))));
+    Map<?,?> map = ma.getMap(readerInspector.getStructFieldData(row,
+        fields.get(11)));
+    assertEquals(0, map.size());
+
+    // check the contents of second row
+    assertEquals(true, rows.hasNext());
+    row = rows.next(row);
+    assertEquals(true,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(100, by.get(readerInspector.getStructFieldData(row,
+        fields.get(1))));
+    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
+        fields.get(2))));
+    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+        fields.get(3))));
+    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+        getStructFieldData(row, fields.get(4))));
+    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
+        fields.get(5))), 0.00001);
+    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
+        fields.get(6))), 0.00001);
+    assertEquals(bytes(), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(7))));
+    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(8))));
+    midRow = midli.getList(mid.getStructFieldData(readerInspector.
+        getStructFieldData(row, fields.get(9)), midFields.get(0)));
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+        inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(0), inFields.get(1))));
+    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+        inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(1), inFields.get(1))));
+    list = li.getList(readerInspector.getStructFieldData(row,
+        fields.get(10)));
+    assertEquals(3, list.size());
+    assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0),
+        inFields.get(0))));
+    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(0), inFields.get(1))));
+    assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1),
+        inFields.get(0))));
+    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(1), inFields.get(1))));
+    assertEquals(1234, in.get(inner.getStructFieldData(list.get(2),
+        inFields.get(0))));
+    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(2), inFields.get(1))));
+    map = ma.getMap(readerInspector.getStructFieldData(row,
+        fields.get(11)));
+    assertEquals(2, map.size());
+    boolean[] found = new boolean[2];
+    for(Object key: map.keySet()) {
+      String str = mk.getPrimitiveJavaObject(key);
+      if (str.equals("chani")) {
+        assertEquals(false, found[0]);
+        assertEquals(5, in.get(inner.getStructFieldData(map.get(key),
+            inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(
+            inner.getStructFieldData(map.get(key), inFields.get(1))));
+        found[0] = true;
+      } else if (str.equals("mauddib")) {
+        assertEquals(false, found[1]);
+        assertEquals(1, in.get(inner.getStructFieldData(map.get(key),
+            inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(
+            inner.getStructFieldData(map.get(key), inFields.get(1))));
+        found[1] = true;
+      } else {
+        throw new IllegalArgumentException("Unknown key " + str);
+      }
+    }
+    assertEquals(true, found[0]);
+    assertEquals(true, found[1]);
+
+    // handle the close up
+    assertEquals(false, rows.hasNext());
+    rows.close();
+  }
+
+  @Test
+  public void columnProjection() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (InnerStruct.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        1000, CompressionKind.NONE, 100, 1000);
+    Random r1 = new Random(1);
+    Random r2 = new Random(2);
+    int x;
+    int minInt=0, maxInt=0;
+    String y;
+    String minStr = null, maxStr = null;
+    for(int i=0; i < 21000; ++i) {
+      x = r1.nextInt();
+      y = Long.toHexString(r2.nextLong());
+      if (i == 0 || x < minInt) {
+        minInt = x;
+      }
+      if (i == 0 || x > maxInt) {
+        maxInt = x;
+      }
+      if (i == 0 || y.compareTo(minStr) < 0) {
+        minStr = y;
+      }
+      if (i == 0 || y.compareTo(maxStr) > 0) {
+        maxStr = y;
+      }
+      writer.addRow(inner(x, y));
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+
+    // check out the statistics
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(3, stats.length);
+    for(ColumnStatistics s: stats) {
+      assertEquals(21000, s.getNumberOfValues());
+      if (s instanceof IntegerColumnStatistics) {
+        assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
+        assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
+      } else if (s instanceof  StringColumnStatistics) {
+        assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
+        assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
+      }
+    }
+
+    // check out the types
+    List<OrcProto.Type> types = reader.getTypes();
+    assertEquals(3, types.size());
+    assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
+    assertEquals(2, types.get(0).getSubtypesCount());
+    assertEquals(1, types.get(0).getSubtypes(0));
+    assertEquals(2, types.get(0).getSubtypes(1));
+    assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
+    assertEquals(0, types.get(1).getSubtypesCount());
+    assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
+    assertEquals(0, types.get(2).getSubtypesCount());
+
+    // read the contents and make sure they match
+    RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
+    RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
+    r1 = new Random(1);
+    r2 = new Random(2);
+    OrcStruct row1 = null;
+    OrcStruct row2 = null;
+    for(int i = 0; i < 21000; ++i) {
+      assertEquals(true, rows1.hasNext());
+      assertEquals(true, rows2.hasNext());
+      row1 = (OrcStruct) rows1.next(row1);
+      row2 = (OrcStruct) rows2.next(row2);
+      assertEquals(r1.nextInt(), ((IntWritable) row1.getFieldValue(0)).get());
+      assertEquals(Long.toHexString(r2.nextLong()),
+          row2.getFieldValue(1).toString());
+    }
+    assertEquals(false, rows1.hasNext());
+    assertEquals(false, rows2.hasNext());
+    rows1.close();
+    rows2.close();
+  }
+
+  @Test
+  public void emptyFile() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        1000, CompressionKind.NONE, 100, 10000);
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    assertEquals(false, reader.rows(null).hasNext());
+    assertEquals(CompressionKind.NONE, reader.getCompression());
+    assertEquals(0, reader.getNumberOfRows());
+    assertEquals(0, reader.getCompressionSize());
+    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+    assertEquals(3, reader.getContentLength());
+    assertEquals(false, reader.getStripes().iterator().hasNext());
+  }
+
+  @Test
+  public void metaData() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        1000, CompressionKind.NONE, 100, 10000);
+    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
+    writer.addUserMetadata("clobber", byteBuf(1,2,3));
+    writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
+    ByteBuffer bigBuf = ByteBuffer.allocate(40000);
+    Random random = new Random(0);
+    random.nextBytes(bigBuf.array());
+    writer.addUserMetadata("big", bigBuf);
+    bigBuf.position(0);
+    writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42,
+        42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
+        null, null, null, null));
+    writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
+    assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
+        reader.getMetadataValue("my.meta"));
+    assertEquals(bigBuf, reader.getMetadataValue("big"));
+    try {
+      reader.getMetadataValue("unknown");
+      assertTrue(false);
+    } catch (IllegalArgumentException iae) {
+      // PASS
+    }
+    int i = 0;
+    for(String key: reader.getMetadataKeys()) {
+      if ("my.meta".equals(key) ||
+          "clobber".equals(key) ||
+          "big".equals(key)) {
+        i += 1;
+      } else {
+        throw new IllegalArgumentException("unknown key " + key);
+      }
+    }
+    assertEquals(3, i);
+  }
+
+  /**
+   * We test union and timestamp separately since we need to make the
+   * object inspector manually. (The Hive reflection-based doesn't handle
+   * them properly.)
+   */
+  @Test
+  public void testUnionAndTimestamp() throws Exception {
+    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).
+        addFieldNames("time").addFieldNames("union").
+        addSubtypes(1).addSubtypes(2).build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).
+        build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).
+        addSubtypes(3).addSubtypes(4).build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).
+        build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).
+        build());
+
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = OrcStruct.createObjectInspector(0, types);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        1000, CompressionKind.NONE, 100, 10000);
+    OrcStruct row = new OrcStruct(2);
+    OrcUnion union = new OrcUnion();
+    row.setFieldValue(1, union);
+    row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
+    union.set((byte) 0, new IntWritable(42));
+    writer.addRow(row);
+    row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
+    union.set((byte)1, new Text("hello"));
+    writer.addRow(row);
+    row.setFieldValue(0, null);
+    row.setFieldValue(1, null);
+    writer.addRow(row);
+    row.setFieldValue(1, union);
+    union.set((byte) 0, null);
+    writer.addRow(row);
+    union.set((byte) 1, null);
+    writer.addRow(row);
+    union.set((byte) 0, new IntWritable(200000));
+    row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
+    writer.addRow(row);
+    for(int i=1900; i < 2200; ++i) {
+      row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
+      if ((i & 1) == 0) {
+        union.set((byte) 0, new IntWritable(i*i));
+      } else {
+        union.set((byte) 1, new Text(new Integer(i*i).toString()));
+      }
+      writer.addRow(row);
+    }
+    // let's add a lot of constant rows to test the rle
+    row.setFieldValue(0, null);
+    union.set((byte) 0, new IntWritable(1732050807));
+    for(int i=0; i < 1000; ++i) {
+      writer.addRow(row);
+    }
+    union.set((byte) 0, new IntWritable(0));
+    writer.addRow(row);
+    union.set((byte) 0, new IntWritable(10));
+    writer.addRow(row);
+    union.set((byte) 0, new IntWritable(138));
+    writer.addRow(row);
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+    assertEquals(1309, reader.getNumberOfRows());
+    int stripeCount = 0;
+    int rowCount = 0;
+    long currentOffset = -1;
+    for(StripeInformation stripe: reader.getStripes()) {
+      stripeCount += 1;
+      rowCount += stripe.getNumberOfRows();
+      if (currentOffset < 0) {
+        currentOffset = stripe.getOffset() + stripe.getIndexLength() +
+            stripe.getDataLength() + stripe.getFooterLength();
+      } else {
+        assertEquals(currentOffset, stripe.getOffset());
+        currentOffset += stripe.getIndexLength() +
+            stripe.getDataLength() + stripe.getFooterLength();
+      }
+    }
+    assertEquals(reader.getNumberOfRows(), rowCount);
+    assertEquals(2, stripeCount);
+    assertEquals(reader.getContentLength(), currentOffset);
+    RecordReader rows = reader.rows(null);
+    assertEquals(0, rows.getRowNumber());
+    assertEquals(0.0, rows.getProgress(), 0.000001);
+    assertEquals(true, rows.hasNext());
+    row = (OrcStruct) rows.next(null);
+    inspector = reader.getObjectInspector();
+    assertEquals("struct<time:timestamp,union:union{int, string}>",
+        inspector.getTypeName());
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+        row.getFieldValue(0));
+    union = (OrcUnion) row.getFieldValue(1);
+    assertEquals(0, union.getTag());
+    assertEquals(new IntWritable(42), union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
+        row.getFieldValue(0));
+    assertEquals(1, union.getTag());
+    assertEquals(new Text("hello"), union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(null, row.getFieldValue(0));
+    assertEquals(null, row.getFieldValue(1));
+    row = (OrcStruct) rows.next(row);
+    assertEquals(null, row.getFieldValue(0));
+    union = (OrcUnion) row.getFieldValue(1);
+    assertEquals(0, union.getTag());
+    assertEquals(null, union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(null, row.getFieldValue(0));
+    assertEquals(1, union.getTag());
+    assertEquals(null, union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
+        row.getFieldValue(0));
+    assertEquals(new IntWritable(200000), union.getObject());
+    for(int i=1900; i < 2200; ++i) {
+      row = (OrcStruct) rows.next(row);
+      assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
+          row.getFieldValue(0));
+      if ((i & 1) == 0) {
+        assertEquals(0, union.getTag());
+        assertEquals(new IntWritable(i*i), union.getObject());
+      } else {
+        assertEquals(1, union.getTag());
+        assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
+      }
+    }
+    for(int i=0; i < 1000; ++i) {
+      row = (OrcStruct) rows.next(row);
+      assertEquals(new IntWritable(1732050807), union.getObject());
+    }
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new IntWritable(0), union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new IntWritable(10), union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new IntWritable(138), union.getObject());
+    assertEquals(false, rows.hasNext());
+    assertEquals(1.0, rows.getProgress(), 0.00001);
+    assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
+    rows.close();
+  }
+
+  /**
+   * Read and write a randomly generated snappy file.
+   * @throws Exception
+   */
+  @Test
+  public void testSnappy() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (InnerStruct.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        1000, CompressionKind.SNAPPY, 100, 10000);
+    Random rand = new Random(12);
+    for(int i=0; i < 10000; ++i) {
+      writer.addRow(new InnerStruct(rand.nextInt(),
+          Integer.toHexString(rand.nextInt())));
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    RecordReader rows = reader.rows(null);
+    rand = new Random(12);
+    OrcStruct row = null;
+    for(int i=0; i < 10000; ++i) {
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      assertEquals(rand.nextInt(), ((IntWritable) row.getFieldValue(0)).get());
+      assertEquals(Integer.toHexString(rand.nextInt()),
+          row.getFieldValue(1).toString());
+    }
+    assertEquals(false, rows.hasNext());
+    rows.close();
+  }
+
+  /**
+   * Read and write a randomly generated snappy file.
+   * @throws Exception
+   */
+  @Test
+  public void testWithoutIndex() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (InnerStruct.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        5000, CompressionKind.SNAPPY, 1000, 0);
+    Random rand = new Random(24);
+    for(int i=0; i < 10000; ++i) {
+      InnerStruct row = new InnerStruct(rand.nextInt(),
+          Integer.toBinaryString(rand.nextInt()));
+      for(int j=0; j< 5; ++j) {
+        writer.addRow(row);
+      }
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    assertEquals(50000, reader.getNumberOfRows());
+    assertEquals(0, reader.getRowIndexStride());
+    StripeInformation stripe = reader.getStripes().iterator().next();
+    assertEquals(true, stripe.getDataLength() != 0);
+    assertEquals(0, stripe.getIndexLength());
+    RecordReader rows = reader.rows(null);
+    rand = new Random(24);
+    OrcStruct row = null;
+    for(int i=0; i < 10000; ++i) {
+      int intVal = rand.nextInt();
+      String strVal = Integer.toBinaryString(rand.nextInt());
+      for(int j=0; j < 5; ++j) {
+        assertEquals(true, rows.hasNext());
+        row = (OrcStruct) rows.next(row);
+        assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
+        assertEquals(strVal, row.getFieldValue(1).toString());
+      }
+    }
+    assertEquals(false, rows.hasNext());
+    rows.close();
+  }
+
+  @Test
+  public void testSeek() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
+        200000, CompressionKind.ZLIB, 65536, 1000);
+    Random rand = new Random(42);
+    final int COUNT=32768;
+    long[] intValues= new long[COUNT];
+    double[] doubleValues = new double[COUNT];
+    String[] stringValues = new String[COUNT];
+    BytesWritable[] byteValues = new BytesWritable[COUNT];
+    String[] words = new String[128];
+    for(int i=0; i < words.length; ++i) {
+      words[i] = Integer.toHexString(rand.nextInt());
+    }
+    for(int i=0; i < COUNT/2; ++i) {
+      intValues[2*i] = rand.nextLong();
+      intValues[2*i+1] = intValues[2*i];
+      stringValues[2*i] = words[rand.nextInt(words.length)];
+      stringValues[2*i+1] = stringValues[2*i];
+    }
+    for(int i=0; i < COUNT; ++i) {
+      doubleValues[i] = rand.nextDouble();
+      byte[] buf = new byte[20];
+      rand.nextBytes(buf);
+      byteValues[i] = new BytesWritable(buf);
+    }
+    for(int i=0; i < COUNT; ++i) {
+      writer.addRow(createRandomRow(intValues, doubleValues, stringValues,
+          byteValues, words, i));
+    }
+    writer.close();
+    writer = null;
+    Reader reader = OrcFile.createReader(fs, testFilePath);
+    assertEquals(COUNT, reader.getNumberOfRows());
+    RecordReader rows = reader.rows(null);
+    OrcStruct row = null;
+    for(int i=COUNT-1; i >= 0; --i) {
+      rows.seekToRow(i);
+      row = (OrcStruct) rows.next(row);
+      BigRow expected = createRandomRow(intValues, doubleValues,
+          stringValues, byteValues, words, i);
+      assertEquals(expected.boolean1.booleanValue(),
+          ((BooleanWritable) row.getFieldValue(0)).get());
+      assertEquals(expected.byte1.byteValue(),
+          ((ByteWritable) row.getFieldValue(1)).get());
+      assertEquals(expected.short1.shortValue(),
+          ((ShortWritable) row.getFieldValue(2)).get());
+      assertEquals(expected.int1.intValue(),
+          ((IntWritable) row.getFieldValue(3)).get());
+      assertEquals(expected.long1.longValue(),
+          ((LongWritable) row.getFieldValue(4)).get());
+      assertEquals(expected.float1.floatValue(),
+          ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
+      assertEquals(expected.double1.doubleValue(),
+          ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
+      assertEquals(expected.bytes1, row.getFieldValue(7));
+      assertEquals(expected.string1, row.getFieldValue(8));
+      List<InnerStruct> expectedList = expected.middle.list;
+      List<OrcStruct> actualList =
+          (List) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
+      compareList(expectedList, actualList);
+      compareList(expected.list, (List) row.getFieldValue(10));
+    }
+  }
+
+  private void compareInner(InnerStruct expect,
+                            OrcStruct actual) throws Exception {
+    if (expect == null || actual == null) {
+      assertEquals(expect, actual);
+    } else {
+      assertEquals(expect.int1, ((IntWritable) actual.getFieldValue(0)).get());
+      assertEquals(expect.string1, actual.getFieldValue(1));
+    }
+  }
+
+  private void compareList(List<InnerStruct> expect,
+                           List<OrcStruct> actual) throws Exception {
+    assertEquals(expect.size(), actual.size());
+    for(int j=0; j < expect.size(); ++j) {
+      compareInner(expect.get(j), actual.get(j));
+    }
+  }
+
+  private BigRow createRandomRow(long[] intValues, double[] doubleValues,
+                                 String[] stringValues,
+                                 BytesWritable[] byteValues,
+                                 String[] words, int i) {
+    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
+    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
+        words[i % words.length] + "-x");
+    return new BigRow((intValues[i] & 1) == 0, (byte) intValues[i],
+        (short) intValues[i], (int) intValues[i], intValues[i],
+        (float) doubleValues[i], doubleValues[i], byteValues[i],stringValues[i],
+        new MiddleStruct(inner, inner2), list(), map(inner,inner2));
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestOrcStruct {
+
+  @Test
+  public void testStruct() throws Exception {
+    OrcStruct st1 = new OrcStruct(4);
+    OrcStruct st2 = new OrcStruct(4);
+    OrcStruct st3 = new OrcStruct(3);
+    st1.setFieldValue(0, "hop");
+    st1.setFieldValue(1, "on");
+    st1.setFieldValue(2, "pop");
+    st1.setFieldValue(3, 42);
+    assertEquals(false, st1.equals(null));
+    st2.setFieldValue(0, "hop");
+    st2.setFieldValue(1, "on");
+    st2.setFieldValue(2, "pop");
+    st2.setFieldValue(3, 42);
+    assertEquals(st1, st2);
+    st3.setFieldValue(0, "hop");
+    st3.setFieldValue(1, "on");
+    st3.setFieldValue(2, "pop");
+    assertEquals(false, st1.equals(st3));
+    assertEquals(11241, st1.hashCode());
+    assertEquals(st1.hashCode(), st2.hashCode());
+    assertEquals(11204, st3.hashCode());
+    assertEquals("{hop, on, pop, 42}", st1.toString());
+    st1.setFieldValue(3, null);
+    assertEquals(false, st1.equals(st2));
+    assertEquals(false, st2.equals(st1));
+    st2.setFieldValue(3, null);
+    assertEquals(st1, st2);
+  }
+
+  @Test
+  public void testInspectorFromTypeInfo() throws Exception {
+    TypeInfo typeInfo =
+        TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
+            ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
+            "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
+            ",c13:array<timestamp>>");
+    StructObjectInspector inspector = (StructObjectInspector)
+        OrcStruct.createObjectInspector(typeInfo);
+    assertEquals("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
+        "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
+        "c1:int>,c11:map<int,int>,c12:union{int},c13:array<timestamp>>",
+        inspector.getTypeName());
+    assertEquals(null,
+        inspector.getAllStructFieldRefs().get(0).getFieldComment());
+    assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
+    OrcStruct s1 = new OrcStruct(13);
+    for(int i=0; i < 13; ++i) {
+      s1.setFieldValue(i, i);
+    }
+
+    List<Object> list = new ArrayList<Object>();
+    list.addAll(Arrays.asList(0,1,2,3,4,5,6,7,8,9,10,11,12));
+    assertEquals(list, inspector.getStructFieldsDataAsList(s1));
+    ListObjectInspector listOI = (ListObjectInspector)
+        inspector.getAllStructFieldRefs().get(12).getFieldObjectInspector();
+    assertEquals(ObjectInspector.Category.LIST, listOI.getCategory());
+    assertEquals(10, listOI.getListElement(list, 10));
+    assertEquals(13, listOI.getListLength(list));
+
+    Map<Integer, Integer> map = new HashMap<Integer,Integer>();
+    map.put(1,2);
+    map.put(2,4);
+    map.put(3,6);
+    MapObjectInspector mapOI = (MapObjectInspector)
+        inspector.getAllStructFieldRefs().get(10).getFieldObjectInspector();
+    assertEquals(3, mapOI.getMapSize(map));
+    assertEquals(4, mapOI.getMapValueElement(map, 2));
+  }
+
+  @Test
+  public void testUnion() throws Exception {
+    OrcUnion un1 = new OrcUnion();
+    OrcUnion un2 = new OrcUnion();
+    un1.set((byte) 0, "hi");
+    un2.set((byte) 0, "hi");
+    assertEquals(un1, un2);
+    assertEquals(un1.hashCode(), un2.hashCode());
+    un2.set((byte) 0, null);
+    assertEquals(false, un1.equals(un2));
+    assertEquals(false, un2.equals(un1));
+    un1.set((byte) 0, null);
+    assertEquals(un1, un2);
+    un2.set((byte) 0, "hi");
+    un1.set((byte) 1, "hi");
+    assertEquals(false, un1.equals(un2));
+    assertEquals(false, un1.hashCode() == un2.hashCode());
+    un2.set((byte) 1, "byte");
+    assertEquals(false, un1.equals(un2));
+    assertEquals("union(1, hi)", un1.toString());
+    assertEquals(false, un1.equals(null));
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+
+public class TestRunLengthByteReader {
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
+        null, collect));
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[2048];
+    for(int i=0; i < 2048; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      if (i < 1024) {
+        out.write((byte) (i/4));
+      } else {
+        out.write((byte) i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+        inBuf, null, 100));
+    for(int i=0; i < 2048; ++i) {
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    CompressionCodec codec = new SnappyCodec();
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 500,
+        codec, collect));
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[2048];
+    for(int i=0; i < 2048; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      if (i < 1024) {
+        out.write((byte) (i/4));
+      } else {
+        out.write((byte) i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+        inBuf, codec, 500));
+    for(int i=0; i < 2048; ++i) {
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
+        null, collect));
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write((byte) (i/16));
+      } else {
+        out.write((byte) i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+        inBuf, null, 100));
+    for(int i=0; i < 2048; i += 10) {
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/16) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import static junit.framework.Assert.assertEquals;
+
+public class TestRunLengthIntegerReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+        new OutStream("test", 1000, codec, collect), true);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[4096];
+    Random random = new Random(99);
+    int[] junk = new int[2048];
+    for(int i=0; i < junk.length; ++i) {
+      junk[i] = random.nextInt();
+    }
+    for(int i=0; i < 4096; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, incrementing runs, non-runs
+      if (i < 1024) {
+        out.write(i/4);
+      } else if (i < 2048) {
+        out.write(2*i);
+      } else {
+        out.write(junk[i-2048]);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+        ("test", inBuf, codec, 1000), true);
+    for(int i=0; i < 2048; ++i) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+        new OutStream("test", 100, null, collect), true);
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write(i);
+      } else {
+        out.write(256 * i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+        ("test", inBuf, null, 100), true);
+    for(int i=0; i < 2048; i += 10) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i, x);
+      } else {
+        assertEquals(256 * i, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestSerializationUtils {
+
+  @Test
+  public void TestDoubles() throws Exception {
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils.writeDouble(buffer, 1343822337.759);
+    assertEquals(1343822337.759,
+        SerializationUtils.readDouble(new
+            ByteArrayInputStream(buffer.toByteArray())), 0.0001);
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestStreamName {
+
+  @Test
+  public void test1() throws Exception {
+    StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    StreamName s2 = new StreamName(3,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
+    StreamName s4 = new StreamName(5,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    assertEquals(true, s1.equals(s1));
+    assertEquals(false, s1.equals(s2));
+    assertEquals(false, s1.equals(s3));
+    assertEquals(true, s1.equals(s1p));
+    assertEquals(true, s1.compareTo(null) < 0);
+    assertEquals(false, s1.equals(null));
+    assertEquals(true, s1.compareTo(s2) < 0);
+    assertEquals(true, s2.compareTo(s3) < 0);
+    assertEquals(true, s3.compareTo(s4) < 0);
+    assertEquals(true, s4.compareTo(s1p) > 0);
+    assertEquals(0, s1p.compareTo(s1));
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Test the red-black tree with string keys.
+ */
+public class TestStringRedBlackTree {
+
+  /**
+   * Checks the red-black tree rules to make sure that we have correctly built
+   * a valid tree.
+   *
+   * Properties:
+   *   1. Red nodes must have black children
+   *   2. Each node must have the same black height on both sides.
+   *
+   * @param node The id of the root of the subtree to check for the red-black
+   *        tree properties.
+   * @return The black-height of the subtree.
+   */
+  private int checkSubtree(RedBlackTree tree, int node, IntWritable count
+                          ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      return 1;
+    }
+    count.set(count.get() + 1);
+    boolean is_red = tree.isRed(node);
+    int left = tree.getLeft(node);
+    int right = tree.getRight(node);
+    if (is_red) {
+      if (tree.isRed(left)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Left node of " + node + " is " + left +
+          " and both are red.");
+      }
+      if (tree.isRed(right)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Right node of " + node + " is " +
+          right + " and both are red.");
+      }
+    }
+    int left_depth = checkSubtree(tree, left, count);
+    int right_depth = checkSubtree(tree, right, count);
+    if (left_depth != right_depth) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Lopsided tree at node " + node +
+        " with depths " + left_depth + " and " + right_depth);
+    }
+    if (is_red) {
+      return left_depth;
+    } else {
+      return left_depth + 1;
+    }
+  }
+
+  /**
+   * Checks the validity of the entire tree. Also ensures that the number of
+   * nodes visited is the same as the size of the set.
+   */
+  void checkTree(RedBlackTree tree) throws IOException {
+    IntWritable count = new IntWritable(0);
+    if (tree.isRed(tree.root)) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("root is red");
+    }
+    checkSubtree(tree, tree.root, count);
+    if (count.get() != tree.size) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Broken tree! visited= " + count.get() +
+        " size=" + tree.size);
+    }
+  }
+
+  void printTree(RedBlackTree tree, String indent, int node
+                ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      System.err.println(indent + "NULL");
+    } else {
+      System.err.println(indent + "Node " + node + " color " +
+        (tree.isRed(node) ? "red" : "black") + " count " + tree.getCount(node));
+      printTree(tree, indent + "  ", tree.getLeft(node));
+      printTree(tree, indent + "  ", tree.getRight(node));
+    }
+  }
+
+  private static class MyVisitor implements StringRedBlackTree.Visitor {
+    private final String[] words;
+    private final int[] counts;
+    private final int[] order;
+    private final DataOutputBuffer buffer = new DataOutputBuffer();
+    int current = 0;
+
+    MyVisitor(String[] args, int[] counts, int[] order) {
+      words = args;
+      this.counts = counts;
+      this.order = order;
+    }
+
+    @Override
+    public void visit(StringRedBlackTree.VisitorContext context
+                     ) throws IOException {
+      String word = context.getText().toString();
+      assertEquals("in word " + current, words[current], word);
+      assertEquals("in word " + current, counts[current], context.getCount());
+      assertEquals("in word " + current, order[current],
+        context.getOriginalPosition());
+      buffer.reset();
+      context.writeBytes(buffer);
+      assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
+      current += 1;
+    }
+  }
+
+  void checkContents(StringRedBlackTree tree, int[] counts, int[] order,
+                     String... params
+                    ) throws IOException {
+    tree.visit(new MyVisitor(params, counts, order));
+  }
+
+  StringRedBlackTree buildTree(String... params) throws IOException {
+    StringRedBlackTree result = new StringRedBlackTree();
+    for(String word: params) {
+      result.add(word);
+      checkTree(result);
+    }
+    return result;
+  }
+
+  @Test
+  public void test1() throws Exception {
+    StringRedBlackTree tree = new StringRedBlackTree(5);
+    assertEquals(0, tree.getByteSize());
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(3, tree.add("greg"));
+    checkTree(tree);
+    assertEquals(4, tree.add("eric"));
+    checkTree(tree);
+    assertEquals(5, tree.add("arun"));
+    checkTree(tree);
+    assertEquals(6, tree.size());
+    checkTree(tree);
+    assertEquals(6, tree.add("eric14"));
+    checkTree(tree);
+    assertEquals(7, tree.add("o"));
+    checkTree(tree);
+    assertEquals(8, tree.add("ziggy"));
+    checkTree(tree);
+    assertEquals(9, tree.add("z"));
+    checkTree(tree);
+    checkContents(tree, new int[]{2,1,2,1,1,1,1,2,1,1},
+      new int[]{2,5,1,4,6,3,7,0,9,8},
+      "alan", "arun", "ashutosh", "eric", "eric14", "greg",
+      "o", "owen", "z", "ziggy");
+    assertEquals(10*5*4 + 8 + 6 + 5 + 5 * 4 + 2 * 1, tree.getByteSize());
+    // check that adding greg again bumps the count
+    assertEquals(1, tree.getCount(3));
+    assertEquals(3, tree.add("greg"));
+    assertEquals(2, tree.getCount(3));
+    assertEquals(41, tree.getCharacterSize());
+    // add some more strings to test the different branches of the
+    // rebalancing
+    assertEquals(10, tree.add("zak"));
+    checkTree(tree);
+    assertEquals(11, tree.add("eric1"));
+    checkTree(tree);
+    assertEquals(12, tree.add("ash"));
+    checkTree(tree);
+    assertEquals(13, tree.add("harry"));
+    checkTree(tree);
+    assertEquals(14, tree.add("john"));
+    checkTree(tree);
+    tree.clear();
+    checkTree(tree);
+    assertEquals(0, tree.getByteSize());
+    assertEquals(0, tree.getCharacterSize());
+  }
+
+  @Test
+  public void test2() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+        "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
+      1,1,1, 1,1,1, 1,1}, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+      15,16,17, 18,19,20, 21,22,23, 24,25},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+
+  @Test
+  public void test3() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
+        "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
+      1,1,1, 1,1,1, 1,1}, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+      13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+
+  public static void main(String[] args) throws Exception {
+    TestStringRedBlackTree test = new TestStringRedBlackTree();
+    test.test1();
+    test.test2();
+    test.test3();
+    TestSerializationUtils serUtils = new TestSerializationUtils();
+    serUtils.TestDoubles();
+    TestDynamicArray test6 = new TestDynamicArray();
+    test6.testByteArray();
+    test6.testIntArray();
+    TestZlib zlib = new TestZlib();
+    zlib.testCorrupt();
+    zlib.testNoOverflow();
+    TestInStream inStreamTest = new TestInStream();
+    inStreamTest.testUncompressed();
+    inStreamTest.testCompressed();
+    inStreamTest.testCorruptStream();
+    TestRunLengthByteReader rleByte = new TestRunLengthByteReader();
+    rleByte.testUncompressedSeek();
+    rleByte.testCompressedSeek();
+    rleByte.testSkips();
+    TestRunLengthIntegerReader rleInt = new TestRunLengthIntegerReader();
+    rleInt.testUncompressedSeek();
+    rleInt.testCompressedSeek();
+    rleInt.testSkips();
+    TestBitFieldReader bit = new TestBitFieldReader();
+    bit.testUncompressedSeek();
+    bit.testCompressedSeek();
+    bit.testBiggerItems();
+    bit.testSkips();
+    TestOrcFile test1 = new TestOrcFile();
+    test1.test1();
+    test1.emptyFile();
+    test1.metaData();
+    test1.testUnionAndTimestamp();
+    test1.columnProjection();
+    test1.testSnappy();
+    test1.testWithoutIndex();
+    test1.testSeek();
+    TestFileDump test2 = new TestFileDump();
+    test2.testDump();
+    TestStreamName test3 = new TestStreamName();
+    test3.test1();
+    TestInputOutputFormat test4 = new TestInputOutputFormat();
+    test4.testInOutFormat();
+    test4.testMROutput();
+    test4.testEmptyFile();
+    test4.testDefaultTypes();
+    TestOrcStruct test5 = new TestOrcStruct();
+    test5.testStruct();
+    test5.testInspectorFromTypeInfo();
+    test5.testUnion();
+  }
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+public class TestZlib {
+
+  @Test
+  public void testNoOverflow() throws Exception {
+    ByteBuffer in = ByteBuffer.allocate(10);
+    ByteBuffer out = ByteBuffer.allocate(10);
+    in.put(new byte[]{1,2,3,4,5,6,7,10});
+    in.flip();
+    CompressionCodec codec = new ZlibCodec();
+    assertEquals(false, codec.compress(in, out, null));
+  }
+
+  @Test
+  public void testCorrupt() throws Exception {
+    ByteBuffer buf = ByteBuffer.allocate(1000);
+    buf.put(new byte[]{127,-128,0,99,98,-1});
+    buf.flip();
+    CompressionCodec codec = new ZlibCodec();
+    ByteBuffer out = ByteBuffer.allocate(1000);
+    try {
+      codec.decompress(buf, out);
+      fail();
+    } catch (IOException ioe) {
+      // EXPECTED
+    }
+  }
+}