You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/01/10 00:31:21 UTC

[1/2] orc git commit: ORC-128. Add getStatistics to Writer API to allow user to get statistics as the file is written.

Repository: orc
Updated Branches:
  refs/heads/master 0e92c5c2e -> 4984cb2a2


ORC-128. Add getStatistics to Writer API to allow user to get statistics as the
file is written.

Fixes #78

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/1e8b5986
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/1e8b5986
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/1e8b5986

Branch: refs/heads/master
Commit: 1e8b5986a16eb014f5cb9ab074ec1059f93f99a6
Parents: 0e92c5c
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jan 6 10:22:11 2017 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Jan 9 16:28:40 2017 -0800

----------------------------------------------------------------------
 java/core/src/java/org/apache/orc/Writer.java   |  12 +
 .../apache/orc/impl/ColumnStatisticsImpl.java   | 316 +++++++++++++++++++
 .../java/org/apache/orc/impl/ReaderImpl.java    |   6 +-
 .../java/org/apache/orc/impl/WriterImpl.java    |  12 +
 .../org/apache/orc/TestOrcNullOptimization.java |  18 +-
 .../test/org/apache/orc/TestVectorOrcFile.java  |  22 +-
 6 files changed, 379 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/Writer.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/Writer.java b/java/core/src/java/org/apache/orc/Writer.java
index 4492062..596e14e 100644
--- a/java/core/src/java/org/apache/orc/Writer.java
+++ b/java/core/src/java/org/apache/orc/Writer.java
@@ -111,4 +111,16 @@ public interface Writer {
    * @param userMetadata - user metadata
    */
   public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
+
+  /**
+   * Get the statistics about the columns in the file. The output of this is
+   * based on the time at which it is called. It shall use all of the currently
+   * written data to provide the statistics.
+   *
+   * Please note there are costs involved with invoking this method and should
+   * be used judiciously.
+   *
+   * @return the information about the column
+   */
+  ColumnStatistics[] getStatistics() throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 745ed9a..7e1826a 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -39,6 +39,34 @@ import org.apache.orc.TypeDescription;
 
 public class ColumnStatisticsImpl implements ColumnStatistics {
 
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof ColumnStatisticsImpl)) {
+      return false;
+    }
+
+    ColumnStatisticsImpl that = (ColumnStatisticsImpl) o;
+
+    if (count != that.count) {
+      return false;
+    }
+    if (hasNull != that.hasNull) {
+      return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = (int) (count ^ (count >>> 32));
+    result = 31 * result + (hasNull ? 1 : 0);
+    return result;
+  }
+
   private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
       implements BooleanColumnStatistics {
     private long trueCount = 0;
@@ -102,6 +130,34 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     public String toString() {
       return super.toString() + " true: " + trueCount;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof BooleanStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      BooleanStatisticsImpl that = (BooleanStatisticsImpl) o;
+
+      if (trueCount != that.trueCount) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (int) (trueCount ^ (trueCount >>> 32));
+      return result;
+    }
   }
 
   private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
@@ -247,6 +303,50 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       }
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof IntegerStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      IntegerStatisticsImpl that = (IntegerStatisticsImpl) o;
+
+      if (minimum != that.minimum) {
+        return false;
+      }
+      if (maximum != that.maximum) {
+        return false;
+      }
+      if (sum != that.sum) {
+        return false;
+      }
+      if (hasMinimum != that.hasMinimum) {
+        return false;
+      }
+      if (overflow != that.overflow) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (int) (minimum ^ (minimum >>> 32));
+      result = 31 * result + (int) (maximum ^ (maximum >>> 32));
+      result = 31 * result + (int) (sum ^ (sum >>> 32));
+      result = 31 * result + (hasMinimum ? 1 : 0);
+      result = 31 * result + (overflow ? 1 : 0);
+      return result;
+    }
   }
 
   private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
@@ -364,6 +464,50 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       buf.append(sum);
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof DoubleStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      DoubleStatisticsImpl that = (DoubleStatisticsImpl) o;
+
+      if (hasMinimum != that.hasMinimum) {
+        return false;
+      }
+      if (Double.compare(that.minimum, minimum) != 0) {
+        return false;
+      }
+      if (Double.compare(that.maximum, maximum) != 0) {
+        return false;
+      }
+      if (Double.compare(that.sum, sum) != 0) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      long temp;
+      result = 31 * result + (hasMinimum ? 1 : 0);
+      temp = Double.doubleToLongBits(minimum);
+      result = 31 * result + (int) (temp ^ (temp >>> 32));
+      temp = Double.doubleToLongBits(maximum);
+      result = 31 * result + (int) (temp ^ (temp >>> 32));
+      temp = Double.doubleToLongBits(sum);
+      result = 31 * result + (int) (temp ^ (temp >>> 32));
+      return result;
+    }
   }
 
   protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
@@ -498,6 +642,42 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       }
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof StringStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      StringStatisticsImpl that = (StringStatisticsImpl) o;
+
+      if (sum != that.sum) {
+        return false;
+      }
+      if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+        return false;
+      }
+      if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+      result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+      result = 31 * result + (int) (sum ^ (sum >>> 32));
+      return result;
+    }
   }
 
   protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
@@ -569,6 +749,34 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       }
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof BinaryStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      BinaryStatisticsImpl that = (BinaryStatisticsImpl) o;
+
+      if (sum != that.sum) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (int) (sum ^ (sum >>> 32));
+      return result;
+    }
   }
 
   private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
@@ -694,6 +902,42 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       }
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof DecimalStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      DecimalStatisticsImpl that = (DecimalStatisticsImpl) o;
+
+      if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+        return false;
+      }
+      if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+        return false;
+      }
+      if (sum != null ? !sum.equals(that.sum) : that.sum != null) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+      result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+      result = 31 * result + (sum != null ? sum.hashCode() : 0);
+      return result;
+    }
   }
 
   private static final class DateStatisticsImpl extends ColumnStatisticsImpl
@@ -815,6 +1059,46 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       }
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof DateStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      DateStatisticsImpl that = (DateStatisticsImpl) o;
+
+      if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+        return false;
+      }
+      if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+        return false;
+      }
+      if (minDate != null ? !minDate.equals(that.minDate) : that.minDate != null) {
+        return false;
+      }
+      if (maxDate != null ? !maxDate.equals(that.maxDate) : that.maxDate != null) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+      result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+      result = 31 * result + (minDate != null ? minDate.hashCode() : 0);
+      result = 31 * result + (maxDate != null ? maxDate.hashCode() : 0);
+      return result;
+    }
   }
 
   private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
@@ -925,6 +1209,38 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       }
       return buf.toString();
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof TimestampStatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      TimestampStatisticsImpl that = (TimestampStatisticsImpl) o;
+
+      if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+        return false;
+      }
+      if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+      result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+      return result;
+    }
   }
 
   private long count = 0;

http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index ad3f8ba..c24920d 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -216,7 +216,11 @@ public class ReaderImpl implements Reader {
 
   @Override
   public ColumnStatistics[] getStatistics() {
-    ColumnStatistics[] result = new ColumnStatistics[types.size()];
+    return deserializeStats(fileStats);
+  }
+
+  static ColumnStatistics[] deserializeStats(List<OrcProto.ColumnStatistics> fileStats){
+    ColumnStatistics[] result = new ColumnStatistics[fileStats.size()];
     for(int i=0; i < result.length; ++i) {
       result[i] = ColumnStatisticsImpl.deserialize(fileStats.get(i));
     }

http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 940ef59..d3ab8d0 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -37,6 +37,7 @@ import io.airlift.compress.lzo.LzoCompressor;
 import io.airlift.compress.lzo.LzoDecompressor;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.orc.BinaryColumnStatistics;
+import org.apache.orc.ColumnStatistics;
 import org.apache.orc.util.BloomFilter;
 import org.apache.orc.util.BloomFilterIO;
 import org.apache.orc.CompressionCodec;
@@ -3059,4 +3060,15 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       }
     }
   }
+
+  @Override
+  public ColumnStatistics[] getStatistics()
+      throws IOException {
+    // Generate the stats
+    OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
+
+    // add the column statistics
+    writeFileStatistics(builder, treeWriter);
+    return ReaderImpl.deserializeStats(builder.getStatisticsList());
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
index 0b605c9..45b69b2 100644
--- a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
+++ b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
@@ -18,13 +18,15 @@
 package org.apache.orc;
 
 import static junit.framework.Assert.assertEquals;
+import static org.apache.orc.TestVectorOrcFile.assertEmptyStats;
+import static org.junit.Assert.assertArrayEquals;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import java.util.Random;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -233,9 +235,19 @@ public class TestOrcNullOptimization {
                                          .compress(CompressionKind.NONE)
                                          .bufferSize(10000));
     Random rand = new Random(100);
-    VectorizedRowBatch batch = schema.createRowBatch();
+    int batchSize = 5000;
+    VectorizedRowBatch batch = schema.createRowBatch(batchSize);
+    ColumnStatistics[] writerStats = writer.getStatistics();
+    assertEmptyStats(writerStats);
+    int count = 0;
     for (int i = 1; i < 20000; i++) {
       addRow(writer, batch, rand.nextInt(1), "a", true, 100);
+      count++;
+      if (count % batchSize == 1) {
+        writerStats = writer.getStatistics();
+      } else {
+        assertArrayEquals(writerStats, writer.getStatistics());
+      }
     }
     addRow(writer, batch, 0, "b", true, 100);
     writer.addRowBatch(batch);
@@ -245,6 +257,7 @@ public class TestOrcNullOptimization {
         OrcFile.readerOptions(conf).filesystem(fs));
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
+    assertArrayEquals(stats, writer.getStatistics());
     assertEquals(20000, reader.getNumberOfRows());
     assertEquals(20000, stats[0].getNumberOfValues());
 
@@ -338,6 +351,7 @@ public class TestOrcNullOptimization {
         OrcFile.readerOptions(conf).filesystem(fs));
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
+    assertArrayEquals(stats, writer.getStatistics());
     assertEquals(8, reader.getNumberOfRows());
     assertEquals(8, stats[0].getNumberOfValues());
 

http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index af31b4d..2448cb7 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -20,7 +20,7 @@ package org.apache.orc;
 
 import com.google.common.collect.Lists;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -69,6 +69,7 @@ import java.util.Random;
 
 import static junit.framework.TestCase.assertNotNull;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
@@ -438,6 +439,7 @@ public class TestVectorOrcFile {
 
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
+    assertArrayEquals(stats, writer.getStatistics());
     assertEquals(4, stats[0].getNumberOfValues());
     assertEquals("count: 4 hasNull: false", stats[0].toString());
 
@@ -914,7 +916,7 @@ public class TestVectorOrcFile {
             createInnerSchema()));
   }
 
-  static void assertArrayEquals(boolean[] expected, boolean[] actual) {
+  static void assertArrayBooleanEquals(boolean[] expected, boolean[] actual) {
     assertEquals(expected.length, actual.length);
     boolean diff = false;
     for(int i=0; i < expected.length; ++i) {
@@ -935,6 +937,7 @@ public class TestVectorOrcFile {
             .setSchema(schema)
             .stripeSize(100000)
             .bufferSize(10000));
+    assertEmptyStats(writer.getStatistics());
     VectorizedRowBatch batch = schema.createRowBatch();
     batch.size = 2;
     setBigRow(batch, 0, false, (byte) 1, (short) 1024, 65536,
@@ -948,7 +951,9 @@ public class TestVectorOrcFile {
         list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
         map(inner(5, "chani"), inner(1, "mauddib")));
     writer.addRowBatch(batch);
+    assertEmptyStats(writer.getStatistics());
     writer.close();
+    ColumnStatistics[] closeStatistics = writer.getStatistics();
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
 
@@ -969,7 +974,7 @@ public class TestVectorOrcFile {
         true, true, true, true};
     included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
 
-    assertArrayEquals(expected, included);
+    assertArrayBooleanEquals(expected, included);
 
     expected = new boolean[] {false, true, false, false, false,
         false, false, false, false, true,
@@ -977,7 +982,7 @@ public class TestVectorOrcFile {
         false, false, false, false, true,
         true, true, true, true};
     included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
-    assertArrayEquals(expected, included);
+    assertArrayBooleanEquals(expected, included);
 
     expected = new boolean[] {false, true, true, true, true,
         true, true, true, true, true,
@@ -991,6 +996,7 @@ public class TestVectorOrcFile {
 
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
+    assertArrayEquals(stats, closeStatistics);
     assertEquals(2, stats[1].getNumberOfValues());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
@@ -1128,6 +1134,13 @@ public class TestVectorOrcFile {
     rows.close();
   }
 
+  static void assertEmptyStats(ColumnStatistics[] writerStatistics) {
+    for (ColumnStatistics columnStatistics : writerStatistics){
+      assertEquals(0, columnStatistics.getNumberOfValues());
+      assertFalse(columnStatistics.hasNull());
+    }
+  }
+
   @Test
   public void testColumnProjection() throws Exception {
     TypeDescription schema = createInnerSchema();
@@ -2366,6 +2379,7 @@ public class TestVectorOrcFile {
 
     // check the stats
     ColumnStatistics[] stats = reader.getStatistics();
+    assertArrayEquals(stats, writer.getStatistics());
     assertEquals(4096, stats[0].getNumberOfValues());
     assertEquals(false, stats[0].hasNull());
     for(TypeDescription colType: schema.getChildren()) {


[2/2] orc git commit: ORC-129. Remove duplicate options betwen ReaderOptions and RowReaderOptions. (omalley reviewed by Deepak Majeti)

Posted by om...@apache.org.
ORC-129. Remove duplicate options betwen ReaderOptions and RowReaderOptions.
(omalley reviewed by Deepak Majeti)

Fixes #79

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/4984cb2a
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/4984cb2a
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/4984cb2a

Branch: refs/heads/master
Commit: 4984cb2a28e3d17fe993accf3bbfbe494d77362b
Parents: 1e8b598
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jan 6 14:12:05 2017 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Jan 9 16:30:10 2017 -0800

----------------------------------------------------------------------
 c++/include/orc/Reader.hh    | 30 ++++-----------
 c++/src/ColumnReader.cc      |  7 ++--
 c++/src/ColumnReader.hh      | 24 +++++++++---
 c++/src/Options.hh           | 23 ------------
 c++/src/Reader.cc            | 54 +++++++++++++++-----------
 c++/src/Reader.hh            | 30 ++++++---------
 c++/src/StripeStream.cc      | 27 ++++++++-----
 c++/src/StripeStream.hh      | 10 +++--
 c++/test/TestColumnReader.cc | 79 +++++++++++++++++++--------------------
 tools/src/FileContents.cc    |  4 +-
 tools/src/FileMemory.cc      |  3 +-
 tools/src/FileScan.cc        |  3 +-
 tools/test/TestMatch.cc      | 52 ++++++++++++--------------
 13 files changed, 165 insertions(+), 181 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/include/orc/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 234c889..e946648 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -315,21 +315,6 @@ namespace orc {
     RowReaderOptions& forcedScaleOnHive11Decimal(int32_t forcedScale);
 
     /**
-     * Set the memory allocator.
-     */
-    RowReaderOptions& setMemoryPool(MemoryPool& pool);
-
-    /**
-     * Set the stream to use for printing warning or error messages.
-     */
-    RowReaderOptions& setErrorStream(std::ostream& stream);
-
-    /**
-     * Get the stream to write warnings or errors to.
-     */
-    std::ostream* getErrorStream() const;
-
-    /**
      * Were the field ids set?
      */
     bool getIndexesSet() const;
@@ -378,11 +363,6 @@ namespace orc {
      * What scale should all Hive 0.11 decimals be normalized to?
      */
     int32_t getForcedScaleOnHive11Decimal() const;
-
-    /**
-     * Get the memory allocator.
-     */
-    MemoryPool* getMemoryPool() const;
   };
 
 
@@ -546,11 +526,17 @@ namespace orc {
     virtual const Type& getType() const = 0;
 
     /**
+     * Create a RowReader based on this reader with the default options.
+     * @return a RowReader to read the rows
+     */
+    virtual ORC_UNIQUE_PTR<RowReader> createRowReader() const = 0;
+
+    /**
+     * Create a RowReader based on this reader.
      * @param options RowReader Options
      * @return a RowReader to read the rows
      */
-    virtual ORC_UNIQUE_PTR<RowReader>
-    getRowReader(const RowReaderOptions& options) const = 0;
+    virtual ORC_UNIQUE_PTR<RowReader> createRowReader(const RowReaderOptions& options) const = 0;
 
     /**
      * Get the name of the input stream.

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/ColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index c3fd17b..467f8bb 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -1416,10 +1416,9 @@ namespace orc {
                     (const Type& type,
                      StripeStreams& stripe
                      ): Decimal64ColumnReader(type, stripe) {
-    const RowReaderOptions options = stripe.getRowReaderOptions();
-    scale = options.getForcedScaleOnHive11Decimal();
-    throwOnOverflow = options.getThrowOnHive11DecimalOverflow();
-    errorStream = options.getErrorStream();
+    scale = stripe.getForcedScaleOnHive11Decimal();
+    throwOnOverflow = stripe.getThrowOnHive11DecimalOverflow();
+    errorStream = stripe.getErrorStream();
   }
 
   DecimalHive11ColumnReader::~DecimalHive11ColumnReader() {

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/ColumnReader.hh
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index 19a3ea6..52704fc 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -32,11 +32,6 @@ namespace orc {
     virtual ~StripeStreams();
 
     /**
-     * Get the reader options.
-     */
-    virtual const RowReaderOptions& getRowReaderOptions() const = 0;
-
-    /**
      * Get the array of booleans for which columns are selected.
      * @return the address of an array which contains true at the index of
      *    each columnId is selected.
@@ -69,6 +64,25 @@ namespace orc {
      * Get the writer's timezone, so that we can convert their dates correctly.
      */
     virtual const Timezone& getWriterTimezone() const = 0;
+
+    /**
+     * Get the error stream.
+     * @return a pointer to the stream that should get error messages
+     */
+    virtual std::ostream* getErrorStream() const = 0;
+
+    /**
+     * Should the reader throw when the scale overflows when reading Hive 0.11
+     * decimals.
+     * @return true if it should throw
+     */
+    virtual bool getThrowOnHive11DecimalOverflow() const = 0;
+
+    /**
+     * What is the scale forced on the Hive 0.11 decimals?
+     * @return the number of scale digits
+     */
+    virtual int32_t getForcedScaleOnHive11Decimal() const = 0;
   };
 
   /**

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/Options.hh
----------------------------------------------------------------------
diff --git a/c++/src/Options.hh b/c++/src/Options.hh
index 72beb3f..0c644af 100644
--- a/c++/src/Options.hh
+++ b/c++/src/Options.hh
@@ -129,8 +129,6 @@ namespace orc {
     uint64_t dataLength;
     bool throwOnHive11DecimalOverflow;
     int32_t forcedScaleOnHive11Decimal;
-    std::ostream* errorStream;
-    MemoryPool* memoryPool;
 
     RowReaderOptionsPrivate() {
       selection = ColumnSelection_NONE;
@@ -138,8 +136,6 @@ namespace orc {
       dataLength = std::numeric_limits<uint64_t>::max();
       throwOnHive11DecimalOverflow = true;
       forcedScaleOnHive11Decimal = 6;
-      errorStream = &std::cerr;
-      memoryPool = getDefaultPool();
     }
   };
 
@@ -200,15 +196,6 @@ namespace orc {
     return *this;
   }
 
-  RowReaderOptions& RowReaderOptions::setMemoryPool(MemoryPool& pool) {
-    privateBits->memoryPool = &pool;
-    return *this;
-  }
-
-  MemoryPool* RowReaderOptions::getMemoryPool() const{
-    return privateBits->memoryPool;
-  }
-
   bool RowReaderOptions::getIndexesSet() const {
     return privateBits->selection == ColumnSelection_FIELD_IDS;
   }
@@ -255,16 +242,6 @@ namespace orc {
   int32_t RowReaderOptions::getForcedScaleOnHive11Decimal() const {
     return privateBits->forcedScaleOnHive11Decimal;
   }
-
-  RowReaderOptions& RowReaderOptions::setErrorStream(std::ostream& stream) {
-    privateBits->errorStream = &stream;
-    return *this;
-  }
-
-  std::ostream* RowReaderOptions::getErrorStream() const {
-    return privateBits->errorStream;
-  }
-
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 501336b..1ddaebd 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -213,13 +213,13 @@ namespace orc {
   }
 
   RowReaderImpl::RowReaderImpl(std::shared_ptr<FileContents> _contents,
-                            const RowReaderOptions& opts
+                               const RowReaderOptions& opts
                          ): localTimezone(getLocalTimezone()),
                             contents(_contents),
-                            options(opts),
-                            memoryPool(*opts.getMemoryPool()),
+                            throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()),
+                            forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()),
                             footer(contents->footer.get()),
-                            firstRowOfStripe(memoryPool, 0) {
+                            firstRowOfStripe(*contents->pool, 0) {
     uint64_t numberOfStripes;
     numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
     currentStripe = numberOfStripes;
@@ -256,11 +256,7 @@ namespace orc {
     }
 
     ColumnSelector column_selector(contents.get());
-    column_selector.updateSelected(selectedColumns, options);
-  }
-
-  const RowReaderOptions& RowReaderImpl::getRowReaderOptions() const {
-    return options;
+    column_selector.updateSelected(selectedColumns, opts);
   }
 
   CompressionKind RowReaderImpl::getCompression() const {
@@ -329,6 +325,18 @@ namespace orc {
     reader->skip(currentRowInStripe);
   }
 
+  const FileContents& RowReaderImpl::getFileContents() const {
+    return *contents;
+  }
+
+  bool RowReaderImpl::getThrowOnHive11DecimalOverflow() const {
+    return throwOnHive11DecimalOverflow;
+  }
+
+  int32_t RowReaderImpl::getForcedScaleOnHive11Decimal() const {
+    return forcedScaleOnHive11Decimal;
+  }
+
   proto::StripeFooter RowReaderImpl::getStripeFooter
        (const proto::StripeInformation& info) {
     uint64_t stripeFooterStart = info.offset() + info.indexlength() +
@@ -340,9 +348,9 @@ namespace orc {
                          (new SeekableFileInputStream(contents->stream.get(),
                                                       stripeFooterStart,
                                                       stripeFooterLength,
-                                                      memoryPool)),
+                                                      *contents->pool)),
                          contents->blockSize,
-                         memoryPool);
+                         *contents->pool);
     proto::StripeFooter result;
     if (!result.ParseFromZeroCopyStream(pbStream.get())) {
       throw ParseError(std::string("bad StripeFooter from ") +
@@ -359,7 +367,6 @@ namespace orc {
                             options(opts),
                             fileLength(_fileLength),
                             postscriptLength(_postscriptLength),
-                            memoryPool(*opts.getMemoryPool()),
                             footer(contents->footer.get()) {
     isMetadataLoaded = false;
     checkOrcVersion();
@@ -424,7 +431,7 @@ namespace orc {
         stripeInfo.footerlength(),
         stripeInfo.numberofrows(),
         contents->stream.get(),
-        memoryPool,
+        *contents->pool,
         contents->compression,
         contents->blockSize));
   }
@@ -551,9 +558,9 @@ namespace orc {
                              (new SeekableFileInputStream(contents->stream.get(),
                                                           metadataStart,
                                                           metadataSize,
-                                                          memoryPool)),
+                                                          *contents->pool)),
                            contents->blockSize,
-                           memoryPool);
+                           *contents->pool);
       metadata.reset(new proto::Metadata());
       if (!metadata->ParseFromZeroCopyStream(pbStream.get())) {
         throw ParseError("Failed to parse the metadata");
@@ -576,7 +583,12 @@ namespace orc {
     }
   }
 
-  std::unique_ptr<RowReader> ReaderImpl::getRowReader(
+  std::unique_ptr<RowReader> ReaderImpl::createRowReader() const {
+    RowReaderOptions defaultOpts;
+    return createRowReader(defaultOpts);
+  }
+
+  std::unique_ptr<RowReader> ReaderImpl::createRowReader(
            const RowReaderOptions& opts) const {
     return std::unique_ptr<RowReader>(new RowReaderImpl(contents, opts));
   }
@@ -758,7 +770,6 @@ namespace orc {
     StripeStreamsImpl stripeStreams(*this, currentStripeFooter,
                                     currentStripeInfo.offset(),
                                     *(contents->stream.get()),
-                                    memoryPool,
                                     writerTimezone);
     reader = buildReader(*contents->schema.get(), stripeStreams);
   }
@@ -794,7 +805,7 @@ namespace orc {
 
   std::unique_ptr<ColumnVectorBatch> RowReaderImpl::createRowBatch
                                               (uint64_t capacity) const {
-    return getSelectedType().createRowBatch(capacity, memoryPool);
+    return getSelectedType().createRowBatch(capacity, *contents->pool);
   }
 
   void ensureOrcFooter(InputStream* stream,
@@ -883,8 +894,9 @@ namespace orc {
 
   std::unique_ptr<Reader> createReader(std::unique_ptr<InputStream> stream,
                                        const ReaderOptions& options) {
-    MemoryPool *memoryPool = options.getMemoryPool();
     std::shared_ptr<FileContents> contents = std::shared_ptr<FileContents>(new FileContents());
+    contents->pool = options.getMemoryPool();
+    contents->errorStream = options.getErrorStream();
     std::string serializedFooter = options.getSerializedFileTail();
     uint64_t fileLength;
     uint64_t postscriptLength;
@@ -908,7 +920,7 @@ namespace orc {
       if (readSize < 4) {
         throw ParseError("File size too small");
       }
-      DataBuffer<char> *buffer = new DataBuffer<char>(*memoryPool, readSize);
+      DataBuffer<char> *buffer = new DataBuffer<char>(*contents->pool, readSize);
       stream->read(buffer->data(), readSize, fileLength - readSize);
 
       postscriptLength = buffer->data()[readSize - 1] & 0xff;
@@ -927,7 +939,7 @@ namespace orc {
       }
 
       contents->footer = REDUNDANT_MOVE(readFooter(stream.get(), buffer,
-        footerOffset, *contents->postscript,  *memoryPool));
+        footerOffset, *contents->postscript,  *contents->pool));
       delete buffer;
     }
     contents->stream = std::move(stream);

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 3b2eac1..040d60a 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -30,10 +30,6 @@
 
 namespace orc {
 
-  class ReaderOptions;
-  class RowReaderOptions;
-  class StripeInformation;
-
   static const uint64_t DIRECTORY_SIZE_GUESS = 16 * 1024;
 
   /**
@@ -46,6 +42,8 @@ namespace orc {
     std::unique_ptr<Type> schema;
     uint64_t blockSize;
     CompressionKind compression;
+    MemoryPool *pool;
+    std::ostream *errorStream;
   };
 
   class ReaderImpl;
@@ -95,13 +93,11 @@ namespace orc {
 
     // contents
     std::shared_ptr<FileContents> contents;
+    const bool throwOnHive11DecimalOverflow;
+    const int32_t forcedScaleOnHive11Decimal;
 
     // inputs
     std::vector<bool> selectedColumns;
-    const RowReaderOptions& options;
-
-    // custom memory pool
-    MemoryPool& memoryPool;
 
     // footer
     proto::Footer* footer;
@@ -130,7 +126,7 @@ namespace orc {
     * @param options options for reading
     */
     RowReaderImpl(std::shared_ptr<FileContents> contents,
-           const RowReaderOptions& options);
+                  const RowReaderOptions& options);
 
     // Select the columns from the options object
     void updateSelected();
@@ -143,8 +139,6 @@ namespace orc {
 
     bool next(ColumnVectorBatch& data) override;
 
-    const RowReaderOptions& getRowReaderOptions() const;
-
     CompressionKind getCompression() const;
 
     uint64_t getCompressionSize() const;
@@ -153,8 +147,9 @@ namespace orc {
 
     void seekToRow(uint64_t rowNumber) override;
 
-    MemoryPool* getMemoryPool() const ;
-
+    const FileContents& getFileContents() const;
+    bool getThrowOnHive11DecimalOverflow() const;
+    int32_t getForcedScaleOnHive11Decimal() const;
   };
 
   class ReaderImpl : public Reader {
@@ -167,9 +162,6 @@ namespace orc {
     const uint64_t fileLength;
     const uint64_t postscriptLength;
 
-    // custom memory pool
-    MemoryPool& memoryPool;
-
     // footer
     proto::Footer* footer;
     uint64_t numberOfStripes;
@@ -227,8 +219,10 @@ namespace orc {
     std::unique_ptr<Statistics>
     getStripeStatistics(uint64_t stripeIndex) const override;
 
-    std::unique_ptr<RowReader> getRowReader(const RowReaderOptions& options
-                                           ) const override;
+    std::unique_ptr<RowReader> createRowReader() const override;
+
+    std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options
+                                               ) const override;
 
     uint64_t getContentLength() const override;
     uint64_t getStripeStatisticsLength() const override;

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/StripeStream.cc
----------------------------------------------------------------------
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 1be58f1..07ac995 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -29,13 +29,11 @@ namespace orc {
                                        const proto::StripeFooter& _footer,
                                        uint64_t _stripeStart,
                                        InputStream& _input,
-                                       MemoryPool& _memoryPool,
                                        const Timezone& _writerTimezone
                                        ): reader(_reader),
                                           footer(_footer),
                                           stripeStart(_stripeStart),
                                           input(_input),
-                                          memoryPool(_memoryPool),
                                           writerTimezone(_writerTimezone) {
     // PASS
   }
@@ -57,10 +55,6 @@ namespace orc {
     // PASS
   }
 
-  const RowReaderOptions& StripeStreamsImpl::getRowReaderOptions() const {
-    return reader.getRowReaderOptions();
-  }
-
   const std::vector<bool> StripeStreamsImpl::getSelectedColumns() const {
     return reader.getSelectedColumns();
   }
@@ -74,11 +68,16 @@ namespace orc {
     return writerTimezone;
   }
 
-  std::unique_ptr<SeekableInputStream>
+  std::ostream* StripeStreamsImpl::getErrorStream() const {
+    return reader.getFileContents().errorStream;
+  }
+
+    std::unique_ptr<SeekableInputStream>
   StripeStreamsImpl::getStream(uint64_t columnId,
                                proto::Stream_Kind kind,
                                bool shouldStream) const {
     uint64_t offset = stripeStart;
+    MemoryPool *pool = reader.getFileContents().pool;
     for(int i = 0; i < footer.streams_size(); ++i) {
       const proto::Stream& stream = footer.streams(i);
       if (stream.has_kind() &&
@@ -92,10 +91,10 @@ namespace orc {
                                    (&input,
                                     offset,
                                     stream.length(),
-                                    memoryPool,
+                                    *pool,
                                     myBlock)),
                                   reader.getCompressionSize(),
-                                  memoryPool);
+                                  *pool);
       }
       offset += stream.length();
     }
@@ -103,7 +102,15 @@ namespace orc {
   }
 
   MemoryPool& StripeStreamsImpl::getMemoryPool() const {
-    return memoryPool;
+    return *reader.getFileContents().pool;
+  }
+
+  bool StripeStreamsImpl::getThrowOnHive11DecimalOverflow() const {
+    return reader.getThrowOnHive11DecimalOverflow();
+  }
+
+  int32_t StripeStreamsImpl::getForcedScaleOnHive11Decimal() const {
+    return reader.getForcedScaleOnHive11Decimal();
   }
 
   void StripeInformationImpl::ensureStripeFooterLoaded() const {

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/StripeStream.hh
----------------------------------------------------------------------
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index b3eb454..8f31397 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -40,7 +40,6 @@ namespace orc {
     const proto::StripeFooter& footer;
     const uint64_t stripeStart;
     InputStream& input;
-    MemoryPool& memoryPool;
     const Timezone& writerTimezone;
 
   public:
@@ -48,13 +47,10 @@ namespace orc {
                       const proto::StripeFooter& footer,
                       uint64_t stripeStart,
                       InputStream& input,
-                      MemoryPool& memoryPool,
                       const Timezone& writerTimezone);
 
     virtual ~StripeStreamsImpl();
 
-    virtual const RowReaderOptions& getRowReaderOptions() const override;
-
     virtual const std::vector<bool> getSelectedColumns() const override;
 
     virtual proto::ColumnEncoding getEncoding(uint64_t columnId
@@ -68,6 +64,12 @@ namespace orc {
     MemoryPool& getMemoryPool() const override;
 
     const Timezone& getWriterTimezone() const override;
+
+    std::ostream* getErrorStream() const override;
+
+    bool getThrowOnHive11DecimalOverflow() const override;
+
+    int32_t getForcedScaleOnHive11Decimal() const override;
   };
 
  /**

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/test/TestColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index 7c41b57..d09ecca 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -41,11 +41,14 @@ public:
   std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
                                                  proto::Stream_Kind kind,
                                                  bool stream) const override;
-  MOCK_CONST_METHOD0(getRowReaderOptions, const RowReaderOptions&());
   MOCK_CONST_METHOD0(getSelectedColumns, const std::vector<bool>());
   MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (uint64_t));
   MOCK_CONST_METHOD3(getStreamProxy, SeekableInputStream*
                      (uint64_t, proto::Stream_Kind, bool));
+  MOCK_CONST_METHOD0(getErrorStream, std::ostream*());
+  MOCK_CONST_METHOD0(getThrowOnHive11DecimalOverflow, bool());
+  MOCK_CONST_METHOD0(getForcedScaleOnHive11Decimal, int32_t());
+
   MemoryPool& getMemoryPool() const {
     return *getDefaultPool();
   }
@@ -3179,15 +3182,14 @@ TEST(DecimalColumnReader, testDecimal128Skip) {
 TEST(DecimalColumnReader, testDecimalHive11) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  EXPECT_CALL(streams, getRowReaderOptions())
-    .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
-
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
   EXPECT_CALL(streams, getSelectedColumns())
       .WillRepeatedly(testing::Return(selectedColumns));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(true));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(6));
 
   // set getEncoding
   proto::ColumnEncoding directEncoding;
@@ -3258,12 +3260,10 @@ TEST(DecimalColumnReader, testDecimalHive11) {
 TEST(DecimalColumnReader, testDecimalHive11Skip) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  rowReaderOptions.throwOnHive11DecimalOverflow(false)
-    .forcedScaleOnHive11Decimal(3);
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(false));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(3));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
@@ -3365,11 +3365,10 @@ TEST(DecimalColumnReader, testDecimalHive11Skip) {
 TEST(DecimalColumnReader, testDecimalHive11ScaleUp) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  rowReaderOptions.forcedScaleOnHive11Decimal(20);
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(true));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(20));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
@@ -3434,11 +3433,10 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleUp) {
 TEST(DecimalColumnReader, testDecimalHive11ScaleDown) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  rowReaderOptions.forcedScaleOnHive11Decimal(0);
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(true));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(0));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
@@ -3523,10 +3521,10 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleDown) {
 TEST(DecimalColumnReader, testDecimalHive11OverflowException) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(true));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(6));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
@@ -3581,10 +3579,10 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowException) {
 TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(true));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(6));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
@@ -3639,13 +3637,14 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) {
 TEST(DecimalColumnReader, testDecimalHive11OverflowNull) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
   std::stringstream errStream;
-  rowReaderOptions.throwOnHive11DecimalOverflow(false)
-    .setErrorStream(errStream);
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getErrorStream())
+      .WillRepeatedly(testing::Return(&errStream));
+
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(false));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(6));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
@@ -3729,10 +3728,10 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowNull) {
 TEST(DecimalColumnReader, testDecimalHive11BigBatches) {
   MockStripeStreams streams;
 
-  // set getRowReaderOptions()
-  RowReaderOptions rowReaderOptions;
-  EXPECT_CALL(streams, getRowReaderOptions())
-      .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+  EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+      .WillRepeatedly(testing::Return(true));
+  EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+      .WillRepeatedly(testing::Return(6));
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index 971cb2a..9ff86a0 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -26,12 +26,12 @@
 #include <iostream>
 #include <string>
 
-void printContents(const char* filename, const orc::RowReaderOptions rowReaderOpts) {
+void printContents(const char* filename, const orc::RowReaderOptions& rowReaderOpts) {
   orc::ReaderOptions readerOpts;
   std::unique_ptr<orc::Reader> reader;
   std::unique_ptr<orc::RowReader> rowReader;
   reader = orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
 
   std::unique_ptr<orc::ColumnVectorBatch> batch = rowReader->createRowBatch(1000);
   std::string line;

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/src/FileMemory.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc
index 6d03025..c133501 100644
--- a/tools/src/FileMemory.cc
+++ b/tools/src/FileMemory.cc
@@ -69,11 +69,10 @@ void processFile(const char* filename,
   }
   std::unique_ptr<orc::MemoryPool> pool(new TestMemoryPool());
   readerOpts.setMemoryPool(*(pool.get()));
-  rowReaderOpts.setMemoryPool(*(pool.get()));
 
   std::unique_ptr<orc::Reader> reader =
                   orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
-  std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+  std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
 
   std::unique_ptr<orc::ColumnVectorBatch> batch =
       rowReader->createRowBatch(batchSize);

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index 2f3f296..fd5a32b 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -28,10 +28,9 @@
 
 void scanFile(std::ostream & out, const char* filename, uint64_t batchSize) {
   orc::ReaderOptions readerOpts;
-  orc::RowReaderOptions rowReaderOpts;
   std::unique_ptr<orc::Reader> reader =
     orc::createReader(orc::readLocalFile(filename), readerOpts);
-  std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+  std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
   std::unique_ptr<orc::ColumnVectorBatch> batch =
     rowReader->createRowBatch(batchSize);
 

http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/test/TestMatch.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index 41ddb23..7ab6150 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -100,10 +100,9 @@ namespace orc {
 
   TEST_P(FileParam, Metadata) {
     orc::ReaderOptions readerOpts;
-    orc::RowReaderOptions rowReaderOpts;
     std::unique_ptr<Reader> reader =
       createReader(readLocalFile(getFilename()), readerOpts);
-    std::unique_ptr<RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+    std::unique_ptr<RowReader> rowReader = reader->createRowReader();
 
     EXPECT_EQ(GetParam().compression, reader->getCompression());
     EXPECT_EQ(GetParam().compressionSize, reader->getCompressionSize());
@@ -130,9 +129,8 @@ namespace orc {
 
   TEST_P(FileParam, Contents) {
     orc::ReaderOptions readerOpts;
-    orc::RowReaderOptions rowReaderOpts;
     std::unique_ptr<RowReader> rowReader =
-         createReader(readLocalFile(getFilename()), readerOpts)->getRowReader(rowReaderOpts);
+         createReader(readLocalFile(getFilename()), readerOpts)->createRowReader();
 
     unsigned long rowCount = 0;
     std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
@@ -540,7 +538,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
     std::string filename = findExample("demo-11-none.orc");
     std::unique_ptr<Reader> reader =
       createReader(readLocalFile(filename), readerOpts);
-    std::unique_ptr<RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+    std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowReaderOpts);
 
     EXPECT_EQ(CompressionKind_NONE, reader->getCompression());
     EXPECT_EQ(256 * 1024, reader->getCompressionSize());
@@ -633,10 +631,10 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
     offsetOpts.range(80000, 130722);
     std::string filename = findExample("demo-11-none.orc");
     std::unique_ptr<Reader> reader = createReader(readLocalFile(filename), opts);
-    std::unique_ptr<RowReader> fullReader = reader->getRowReader(fullOpts);
-    std::unique_ptr<RowReader> lastReader = reader->getRowReader(lastOpts);
-    std::unique_ptr<RowReader> oobReader = reader->getRowReader(oobOpts);
-    std::unique_ptr<RowReader> offsetReader = reader->getRowReader(offsetOpts);
+    std::unique_ptr<RowReader> fullReader = reader->createRowReader(fullOpts);
+    std::unique_ptr<RowReader> lastReader = reader->createRowReader(lastOpts);
+    std::unique_ptr<RowReader> oobReader = reader->createRowReader(oobOpts);
+    std::unique_ptr<RowReader> offsetReader = reader->createRowReader(offsetOpts);
 
     std::unique_ptr<ColumnVectorBatch> oobBatch =
       oobReader->createRowBatch(5000);
@@ -816,11 +814,10 @@ TEST(TestMatch, seekToRow) {
   /* Test with a regular file */
   {
     orc::ReaderOptions readerOpts;
-    orc::RowReaderOptions rowReaderOpts;
     std::string filename = findExample("demo-11-none.orc");
     std::unique_ptr<orc::Reader> reader =
         orc::createReader(orc::readLocalFile(filename), readerOpts);
-    std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+    std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
     EXPECT_EQ(1920800, reader->getNumberOfRows());
 
     std::unique_ptr<orc::ColumnVectorBatch> batch =
@@ -857,7 +854,7 @@ TEST(TestMatch, seekToRow) {
 
     std::unique_ptr<orc::Reader> reader =
         orc::createReader(orc::readLocalFile(filename), readerOpts);
-    std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+    std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
     EXPECT_EQ(1920800, reader->getNumberOfRows());
 
     std::unique_ptr<orc::ColumnVectorBatch> batch =
@@ -884,11 +881,10 @@ TEST(TestMatch, seekToRow) {
   /* Test with an empty file */
   {
     orc::ReaderOptions readerOpts;
-    orc::RowReaderOptions rowReaderOpts;
     std::string filename = findExample("TestOrcFile.emptyFile.orc");
     std::unique_ptr<orc::Reader> reader =
         orc::createReader(orc::readLocalFile(filename), readerOpts);
-    std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+    std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
     EXPECT_EQ(0, reader->getNumberOfRows());
 
     std::unique_ptr<orc::ColumnVectorBatch> batch =
@@ -929,7 +925,7 @@ TEST(TestMatch, selectColumns) {
     // All columns
     std::unique_ptr<orc::Reader> reader =
         orc::createReader(orc::readLocalFile(filename), readerOpts);
-    std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+    std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
     std::vector<bool> c = rowReader->getSelectedColumns();
     EXPECT_EQ(24, c.size());
     for (unsigned int i=0; i < c.size(); i++) {
@@ -960,7 +956,7 @@ TEST(TestMatch, selectColumns) {
     std::list<uint64_t> cols;
     cols.push_back(1);
     rowReaderOpts.include(cols);
-    rowReader = reader->getRowReader(rowReaderOpts);
+    rowReader = reader->createRowReader(rowReaderOpts);
     c = rowReader->getSelectedColumns();
     for (unsigned int i=1; i < c.size(); i++) {
       if (i==2)
@@ -982,7 +978,7 @@ TEST(TestMatch, selectColumns) {
     cols.clear();
     cols.push_back(9);
     rowReaderOpts.include(cols);
-    rowReader = reader->getRowReader(rowReaderOpts);
+    rowReader = reader->createRowReader(rowReaderOpts);
     c = rowReader->getSelectedColumns();
     for (unsigned int i=1; i < c.size(); i++) {
       if (i>=10 && i<=14)
@@ -1006,7 +1002,7 @@ TEST(TestMatch, selectColumns) {
     cols.clear();
     cols.push_back(10);
     rowReaderOpts.include(cols);
-    rowReader = reader->getRowReader(rowReaderOpts);
+    rowReader = reader->createRowReader(rowReaderOpts);
     c = rowReader->getSelectedColumns();
     for (unsigned int i=1; i < c.size(); i++) {
       if (i>=15 && i<=18)
@@ -1027,7 +1023,7 @@ TEST(TestMatch, selectColumns) {
     cols.clear();
     cols.push_back(11);
     rowReaderOpts.include(cols);
-    rowReader = reader->getRowReader(rowReaderOpts);
+    rowReader = reader->createRowReader(rowReaderOpts);
     c = rowReader->getSelectedColumns();
     for (unsigned int i=1; i < c.size(); i++) {
       if (i>=19 && i<=23)
@@ -1056,7 +1052,7 @@ TEST(TestMatch, selectColumns) {
     cols.push_back(22);
     cols.push_back(23);
     rowReaderOpts.includeTypes(cols);
-    rowReader = reader->getRowReader(rowReaderOpts);
+    rowReader = reader->createRowReader(rowReaderOpts);
     c = rowReader->getSelectedColumns();
     for (unsigned int i=1; i < c.size(); i++) {
       if (i>=19 && i<=23)
@@ -1082,7 +1078,7 @@ TEST(TestMatch, selectColumns) {
     colNames.push_back("middle.list.int1");
     colNames.push_back("middle.list.string1");
     rowReaderOpts.include(colNames);
-    rowReader = reader->getRowReader(rowReaderOpts);
+    rowReader = reader->createRowReader(rowReaderOpts);
     c = rowReader->getSelectedColumns();
     for (unsigned int i=1; i < c.size(); i++) {
       if (i>=10 && i<=14)
@@ -1116,7 +1112,7 @@ TEST(Reader, memoryUse) {
   cols.push_back(1);
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(483517, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(10, batch->getMemoryUsage());
@@ -1129,7 +1125,7 @@ TEST(Reader, memoryUse) {
   cols.push_back(7);
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(835906, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(18, batch->getMemoryUsage());
@@ -1140,7 +1136,7 @@ TEST(Reader, memoryUse) {
   cols.push_back(8);
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(901442, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(18, batch->getMemoryUsage());
@@ -1151,7 +1147,7 @@ TEST(Reader, memoryUse) {
   cols.push_back(9);
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(1294658, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(46, batch->getMemoryUsage());
@@ -1162,7 +1158,7 @@ TEST(Reader, memoryUse) {
   cols.push_back(10);
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(1229122, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(45, batch->getMemoryUsage());
@@ -1173,7 +1169,7 @@ TEST(Reader, memoryUse) {
   cols.push_back(11);
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(1491266, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(62, batch->getMemoryUsage());
@@ -1186,7 +1182,7 @@ TEST(Reader, memoryUse) {
   }
   rowReaderOpts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
-  rowReader = reader->getRowReader(rowReaderOpts);
+  rowReader = reader->createRowReader(rowReaderOpts);
   EXPECT_EQ(4112706, reader->getMemoryUseByFieldId(cols));
   batch = rowReader->createRowBatch(1);
   EXPECT_EQ(248, batch->getMemoryUsage());