You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/01/10 00:31:21 UTC
[1/2] orc git commit: ORC-128. Add getStatistics to Writer API to
allow user to get statistics as the file is written.
Repository: orc
Updated Branches:
refs/heads/master 0e92c5c2e -> 4984cb2a2
ORC-128. Add getStatistics to Writer API to allow user to get statistics as the
file is written.
Fixes #78
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/1e8b5986
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/1e8b5986
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/1e8b5986
Branch: refs/heads/master
Commit: 1e8b5986a16eb014f5cb9ab074ec1059f93f99a6
Parents: 0e92c5c
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jan 6 10:22:11 2017 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Jan 9 16:28:40 2017 -0800
----------------------------------------------------------------------
java/core/src/java/org/apache/orc/Writer.java | 12 +
.../apache/orc/impl/ColumnStatisticsImpl.java | 316 +++++++++++++++++++
.../java/org/apache/orc/impl/ReaderImpl.java | 6 +-
.../java/org/apache/orc/impl/WriterImpl.java | 12 +
.../org/apache/orc/TestOrcNullOptimization.java | 18 +-
.../test/org/apache/orc/TestVectorOrcFile.java | 22 +-
6 files changed, 379 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/Writer.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/Writer.java b/java/core/src/java/org/apache/orc/Writer.java
index 4492062..596e14e 100644
--- a/java/core/src/java/org/apache/orc/Writer.java
+++ b/java/core/src/java/org/apache/orc/Writer.java
@@ -111,4 +111,16 @@ public interface Writer {
* @param userMetadata - user metadata
*/
public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
+
+ /**
+ * Get the statistics about the columns in the file. The output of this is
+ * based on the time at which it is called. It shall use all of the currently
+ * written data to provide the statistics.
+ *
+ * Please note there are costs involved with invoking this method and should
+ * be used judiciously.
+ *
+ * @return the information about the column
+ */
+ ColumnStatistics[] getStatistics() throws IOException;
}
http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 745ed9a..7e1826a 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -39,6 +39,34 @@ import org.apache.orc.TypeDescription;
public class ColumnStatisticsImpl implements ColumnStatistics {
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ColumnStatisticsImpl)) {
+ return false;
+ }
+
+ ColumnStatisticsImpl that = (ColumnStatisticsImpl) o;
+
+ if (count != that.count) {
+ return false;
+ }
+ if (hasNull != that.hasNull) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = (int) (count ^ (count >>> 32));
+ result = 31 * result + (hasNull ? 1 : 0);
+ return result;
+ }
+
private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
implements BooleanColumnStatistics {
private long trueCount = 0;
@@ -102,6 +130,34 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
public String toString() {
return super.toString() + " true: " + trueCount;
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof BooleanStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ BooleanStatisticsImpl that = (BooleanStatisticsImpl) o;
+
+ if (trueCount != that.trueCount) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (int) (trueCount ^ (trueCount >>> 32));
+ return result;
+ }
}
private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
@@ -247,6 +303,50 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
}
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof IntegerStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ IntegerStatisticsImpl that = (IntegerStatisticsImpl) o;
+
+ if (minimum != that.minimum) {
+ return false;
+ }
+ if (maximum != that.maximum) {
+ return false;
+ }
+ if (sum != that.sum) {
+ return false;
+ }
+ if (hasMinimum != that.hasMinimum) {
+ return false;
+ }
+ if (overflow != that.overflow) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (int) (minimum ^ (minimum >>> 32));
+ result = 31 * result + (int) (maximum ^ (maximum >>> 32));
+ result = 31 * result + (int) (sum ^ (sum >>> 32));
+ result = 31 * result + (hasMinimum ? 1 : 0);
+ result = 31 * result + (overflow ? 1 : 0);
+ return result;
+ }
}
private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
@@ -364,6 +464,50 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
buf.append(sum);
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof DoubleStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ DoubleStatisticsImpl that = (DoubleStatisticsImpl) o;
+
+ if (hasMinimum != that.hasMinimum) {
+ return false;
+ }
+ if (Double.compare(that.minimum, minimum) != 0) {
+ return false;
+ }
+ if (Double.compare(that.maximum, maximum) != 0) {
+ return false;
+ }
+ if (Double.compare(that.sum, sum) != 0) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ long temp;
+ result = 31 * result + (hasMinimum ? 1 : 0);
+ temp = Double.doubleToLongBits(minimum);
+ result = 31 * result + (int) (temp ^ (temp >>> 32));
+ temp = Double.doubleToLongBits(maximum);
+ result = 31 * result + (int) (temp ^ (temp >>> 32));
+ temp = Double.doubleToLongBits(sum);
+ result = 31 * result + (int) (temp ^ (temp >>> 32));
+ return result;
+ }
}
protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
@@ -498,6 +642,42 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
}
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof StringStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ StringStatisticsImpl that = (StringStatisticsImpl) o;
+
+ if (sum != that.sum) {
+ return false;
+ }
+ if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+ return false;
+ }
+ if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+ result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+ result = 31 * result + (int) (sum ^ (sum >>> 32));
+ return result;
+ }
}
protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
@@ -569,6 +749,34 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
}
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof BinaryStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ BinaryStatisticsImpl that = (BinaryStatisticsImpl) o;
+
+ if (sum != that.sum) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (int) (sum ^ (sum >>> 32));
+ return result;
+ }
}
private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
@@ -694,6 +902,42 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
}
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof DecimalStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ DecimalStatisticsImpl that = (DecimalStatisticsImpl) o;
+
+ if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+ return false;
+ }
+ if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+ return false;
+ }
+ if (sum != null ? !sum.equals(that.sum) : that.sum != null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+ result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+ result = 31 * result + (sum != null ? sum.hashCode() : 0);
+ return result;
+ }
}
private static final class DateStatisticsImpl extends ColumnStatisticsImpl
@@ -815,6 +1059,46 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
}
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof DateStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ DateStatisticsImpl that = (DateStatisticsImpl) o;
+
+ if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+ return false;
+ }
+ if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+ return false;
+ }
+ if (minDate != null ? !minDate.equals(that.minDate) : that.minDate != null) {
+ return false;
+ }
+ if (maxDate != null ? !maxDate.equals(that.maxDate) : that.maxDate != null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+ result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+ result = 31 * result + (minDate != null ? minDate.hashCode() : 0);
+ result = 31 * result + (maxDate != null ? maxDate.hashCode() : 0);
+ return result;
+ }
}
private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
@@ -925,6 +1209,38 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
}
return buf.toString();
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof TimestampStatisticsImpl)) {
+ return false;
+ }
+ if (!super.equals(o)) {
+ return false;
+ }
+
+ TimestampStatisticsImpl that = (TimestampStatisticsImpl) o;
+
+ if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) {
+ return false;
+ }
+ if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
+ result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
+ return result;
+ }
}
private long count = 0;
http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index ad3f8ba..c24920d 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -216,7 +216,11 @@ public class ReaderImpl implements Reader {
@Override
public ColumnStatistics[] getStatistics() {
- ColumnStatistics[] result = new ColumnStatistics[types.size()];
+ return deserializeStats(fileStats);
+ }
+
+ static ColumnStatistics[] deserializeStats(List<OrcProto.ColumnStatistics> fileStats){
+ ColumnStatistics[] result = new ColumnStatistics[fileStats.size()];
for(int i=0; i < result.length; ++i) {
result[i] = ColumnStatisticsImpl.deserialize(fileStats.get(i));
}
http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 940ef59..d3ab8d0 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -37,6 +37,7 @@ import io.airlift.compress.lzo.LzoCompressor;
import io.airlift.compress.lzo.LzoDecompressor;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.orc.BinaryColumnStatistics;
+import org.apache.orc.ColumnStatistics;
import org.apache.orc.util.BloomFilter;
import org.apache.orc.util.BloomFilterIO;
import org.apache.orc.CompressionCodec;
@@ -3059,4 +3060,15 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
}
}
+
+ @Override
+ public ColumnStatistics[] getStatistics()
+ throws IOException {
+ // Generate the stats
+ OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
+
+ // add the column statistics
+ writeFileStatistics(builder, treeWriter);
+ return ReaderImpl.deserializeStats(builder.getStatisticsList());
+ }
}
http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
index 0b605c9..45b69b2 100644
--- a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
+++ b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
@@ -18,13 +18,15 @@
package org.apache.orc;
import static junit.framework.Assert.assertEquals;
+import static org.apache.orc.TestVectorOrcFile.assertEmptyStats;
+import static org.junit.Assert.assertArrayEquals;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Random;
-import junit.framework.Assert;
+import org.junit.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -233,9 +235,19 @@ public class TestOrcNullOptimization {
.compress(CompressionKind.NONE)
.bufferSize(10000));
Random rand = new Random(100);
- VectorizedRowBatch batch = schema.createRowBatch();
+ int batchSize = 5000;
+ VectorizedRowBatch batch = schema.createRowBatch(batchSize);
+ ColumnStatistics[] writerStats = writer.getStatistics();
+ assertEmptyStats(writerStats);
+ int count = 0;
for (int i = 1; i < 20000; i++) {
addRow(writer, batch, rand.nextInt(1), "a", true, 100);
+ count++;
+ if (count % batchSize == 1) {
+ writerStats = writer.getStatistics();
+ } else {
+ assertArrayEquals(writerStats, writer.getStatistics());
+ }
}
addRow(writer, batch, 0, "b", true, 100);
writer.addRowBatch(batch);
@@ -245,6 +257,7 @@ public class TestOrcNullOptimization {
OrcFile.readerOptions(conf).filesystem(fs));
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
+ assertArrayEquals(stats, writer.getStatistics());
assertEquals(20000, reader.getNumberOfRows());
assertEquals(20000, stats[0].getNumberOfValues());
@@ -338,6 +351,7 @@ public class TestOrcNullOptimization {
OrcFile.readerOptions(conf).filesystem(fs));
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
+ assertArrayEquals(stats, writer.getStatistics());
assertEquals(8, reader.getNumberOfRows());
assertEquals(8, stats[0].getNumberOfValues());
http://git-wip-us.apache.org/repos/asf/orc/blob/1e8b5986/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index af31b4d..2448cb7 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -20,7 +20,7 @@ package org.apache.orc;
import com.google.common.collect.Lists;
-import junit.framework.Assert;
+import org.junit.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -69,6 +69,7 @@ import java.util.Random;
import static junit.framework.TestCase.assertNotNull;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
@@ -438,6 +439,7 @@ public class TestVectorOrcFile {
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
+ assertArrayEquals(stats, writer.getStatistics());
assertEquals(4, stats[0].getNumberOfValues());
assertEquals("count: 4 hasNull: false", stats[0].toString());
@@ -914,7 +916,7 @@ public class TestVectorOrcFile {
createInnerSchema()));
}
- static void assertArrayEquals(boolean[] expected, boolean[] actual) {
+ static void assertArrayBooleanEquals(boolean[] expected, boolean[] actual) {
assertEquals(expected.length, actual.length);
boolean diff = false;
for(int i=0; i < expected.length; ++i) {
@@ -935,6 +937,7 @@ public class TestVectorOrcFile {
.setSchema(schema)
.stripeSize(100000)
.bufferSize(10000));
+ assertEmptyStats(writer.getStatistics());
VectorizedRowBatch batch = schema.createRowBatch();
batch.size = 2;
setBigRow(batch, 0, false, (byte) 1, (short) 1024, 65536,
@@ -948,7 +951,9 @@ public class TestVectorOrcFile {
list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
map(inner(5, "chani"), inner(1, "mauddib")));
writer.addRowBatch(batch);
+ assertEmptyStats(writer.getStatistics());
writer.close();
+ ColumnStatistics[] closeStatistics = writer.getStatistics();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
@@ -969,7 +974,7 @@ public class TestVectorOrcFile {
true, true, true, true};
included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
- assertArrayEquals(expected, included);
+ assertArrayBooleanEquals(expected, included);
expected = new boolean[] {false, true, false, false, false,
false, false, false, false, true,
@@ -977,7 +982,7 @@ public class TestVectorOrcFile {
false, false, false, false, true,
true, true, true, true};
included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
- assertArrayEquals(expected, included);
+ assertArrayBooleanEquals(expected, included);
expected = new boolean[] {false, true, true, true, true,
true, true, true, true, true,
@@ -991,6 +996,7 @@ public class TestVectorOrcFile {
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
+ assertArrayEquals(stats, closeStatistics);
assertEquals(2, stats[1].getNumberOfValues());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
@@ -1128,6 +1134,13 @@ public class TestVectorOrcFile {
rows.close();
}
+ static void assertEmptyStats(ColumnStatistics[] writerStatistics) {
+ for (ColumnStatistics columnStatistics : writerStatistics){
+ assertEquals(0, columnStatistics.getNumberOfValues());
+ assertFalse(columnStatistics.hasNull());
+ }
+ }
+
@Test
public void testColumnProjection() throws Exception {
TypeDescription schema = createInnerSchema();
@@ -2366,6 +2379,7 @@ public class TestVectorOrcFile {
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
+ assertArrayEquals(stats, writer.getStatistics());
assertEquals(4096, stats[0].getNumberOfValues());
assertEquals(false, stats[0].hasNull());
for(TypeDescription colType: schema.getChildren()) {
[2/2] orc git commit: ORC-129. Remove duplicate options betwen
ReaderOptions and RowReaderOptions. (omalley reviewed by Deepak Majeti)
Posted by om...@apache.org.
ORC-129. Remove duplicate options betwen ReaderOptions and RowReaderOptions.
(omalley reviewed by Deepak Majeti)
Fixes #79
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/4984cb2a
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/4984cb2a
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/4984cb2a
Branch: refs/heads/master
Commit: 4984cb2a28e3d17fe993accf3bbfbe494d77362b
Parents: 1e8b598
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jan 6 14:12:05 2017 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Jan 9 16:30:10 2017 -0800
----------------------------------------------------------------------
c++/include/orc/Reader.hh | 30 ++++-----------
c++/src/ColumnReader.cc | 7 ++--
c++/src/ColumnReader.hh | 24 +++++++++---
c++/src/Options.hh | 23 ------------
c++/src/Reader.cc | 54 +++++++++++++++-----------
c++/src/Reader.hh | 30 ++++++---------
c++/src/StripeStream.cc | 27 ++++++++-----
c++/src/StripeStream.hh | 10 +++--
c++/test/TestColumnReader.cc | 79 +++++++++++++++++++--------------------
tools/src/FileContents.cc | 4 +-
tools/src/FileMemory.cc | 3 +-
tools/src/FileScan.cc | 3 +-
tools/test/TestMatch.cc | 52 ++++++++++++--------------
13 files changed, 165 insertions(+), 181 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/include/orc/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 234c889..e946648 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -315,21 +315,6 @@ namespace orc {
RowReaderOptions& forcedScaleOnHive11Decimal(int32_t forcedScale);
/**
- * Set the memory allocator.
- */
- RowReaderOptions& setMemoryPool(MemoryPool& pool);
-
- /**
- * Set the stream to use for printing warning or error messages.
- */
- RowReaderOptions& setErrorStream(std::ostream& stream);
-
- /**
- * Get the stream to write warnings or errors to.
- */
- std::ostream* getErrorStream() const;
-
- /**
* Were the field ids set?
*/
bool getIndexesSet() const;
@@ -378,11 +363,6 @@ namespace orc {
* What scale should all Hive 0.11 decimals be normalized to?
*/
int32_t getForcedScaleOnHive11Decimal() const;
-
- /**
- * Get the memory allocator.
- */
- MemoryPool* getMemoryPool() const;
};
@@ -546,11 +526,17 @@ namespace orc {
virtual const Type& getType() const = 0;
/**
+ * Create a RowReader based on this reader with the default options.
+ * @return a RowReader to read the rows
+ */
+ virtual ORC_UNIQUE_PTR<RowReader> createRowReader() const = 0;
+
+ /**
+ * Create a RowReader based on this reader.
* @param options RowReader Options
* @return a RowReader to read the rows
*/
- virtual ORC_UNIQUE_PTR<RowReader>
- getRowReader(const RowReaderOptions& options) const = 0;
+ virtual ORC_UNIQUE_PTR<RowReader> createRowReader(const RowReaderOptions& options) const = 0;
/**
* Get the name of the input stream.
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/ColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index c3fd17b..467f8bb 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -1416,10 +1416,9 @@ namespace orc {
(const Type& type,
StripeStreams& stripe
): Decimal64ColumnReader(type, stripe) {
- const RowReaderOptions options = stripe.getRowReaderOptions();
- scale = options.getForcedScaleOnHive11Decimal();
- throwOnOverflow = options.getThrowOnHive11DecimalOverflow();
- errorStream = options.getErrorStream();
+ scale = stripe.getForcedScaleOnHive11Decimal();
+ throwOnOverflow = stripe.getThrowOnHive11DecimalOverflow();
+ errorStream = stripe.getErrorStream();
}
DecimalHive11ColumnReader::~DecimalHive11ColumnReader() {
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/ColumnReader.hh
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index 19a3ea6..52704fc 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -32,11 +32,6 @@ namespace orc {
virtual ~StripeStreams();
/**
- * Get the reader options.
- */
- virtual const RowReaderOptions& getRowReaderOptions() const = 0;
-
- /**
* Get the array of booleans for which columns are selected.
* @return the address of an array which contains true at the index of
* each columnId is selected.
@@ -69,6 +64,25 @@ namespace orc {
* Get the writer's timezone, so that we can convert their dates correctly.
*/
virtual const Timezone& getWriterTimezone() const = 0;
+
+ /**
+ * Get the error stream.
+ * @return a pointer to the stream that should get error messages
+ */
+ virtual std::ostream* getErrorStream() const = 0;
+
+ /**
+ * Should the reader throw when the scale overflows when reading Hive 0.11
+ * decimals.
+ * @return true if it should throw
+ */
+ virtual bool getThrowOnHive11DecimalOverflow() const = 0;
+
+ /**
+ * What is the scale forced on the Hive 0.11 decimals?
+ * @return the number of scale digits
+ */
+ virtual int32_t getForcedScaleOnHive11Decimal() const = 0;
};
/**
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/Options.hh
----------------------------------------------------------------------
diff --git a/c++/src/Options.hh b/c++/src/Options.hh
index 72beb3f..0c644af 100644
--- a/c++/src/Options.hh
+++ b/c++/src/Options.hh
@@ -129,8 +129,6 @@ namespace orc {
uint64_t dataLength;
bool throwOnHive11DecimalOverflow;
int32_t forcedScaleOnHive11Decimal;
- std::ostream* errorStream;
- MemoryPool* memoryPool;
RowReaderOptionsPrivate() {
selection = ColumnSelection_NONE;
@@ -138,8 +136,6 @@ namespace orc {
dataLength = std::numeric_limits<uint64_t>::max();
throwOnHive11DecimalOverflow = true;
forcedScaleOnHive11Decimal = 6;
- errorStream = &std::cerr;
- memoryPool = getDefaultPool();
}
};
@@ -200,15 +196,6 @@ namespace orc {
return *this;
}
- RowReaderOptions& RowReaderOptions::setMemoryPool(MemoryPool& pool) {
- privateBits->memoryPool = &pool;
- return *this;
- }
-
- MemoryPool* RowReaderOptions::getMemoryPool() const{
- return privateBits->memoryPool;
- }
-
bool RowReaderOptions::getIndexesSet() const {
return privateBits->selection == ColumnSelection_FIELD_IDS;
}
@@ -255,16 +242,6 @@ namespace orc {
int32_t RowReaderOptions::getForcedScaleOnHive11Decimal() const {
return privateBits->forcedScaleOnHive11Decimal;
}
-
- RowReaderOptions& RowReaderOptions::setErrorStream(std::ostream& stream) {
- privateBits->errorStream = &stream;
- return *this;
- }
-
- std::ostream* RowReaderOptions::getErrorStream() const {
- return privateBits->errorStream;
- }
-
}
#endif
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 501336b..1ddaebd 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -213,13 +213,13 @@ namespace orc {
}
RowReaderImpl::RowReaderImpl(std::shared_ptr<FileContents> _contents,
- const RowReaderOptions& opts
+ const RowReaderOptions& opts
): localTimezone(getLocalTimezone()),
contents(_contents),
- options(opts),
- memoryPool(*opts.getMemoryPool()),
+ throwOnHive11DecimalOverflow(opts.getThrowOnHive11DecimalOverflow()),
+ forcedScaleOnHive11Decimal(opts.getForcedScaleOnHive11Decimal()),
footer(contents->footer.get()),
- firstRowOfStripe(memoryPool, 0) {
+ firstRowOfStripe(*contents->pool, 0) {
uint64_t numberOfStripes;
numberOfStripes = static_cast<uint64_t>(footer->stripes_size());
currentStripe = numberOfStripes;
@@ -256,11 +256,7 @@ namespace orc {
}
ColumnSelector column_selector(contents.get());
- column_selector.updateSelected(selectedColumns, options);
- }
-
- const RowReaderOptions& RowReaderImpl::getRowReaderOptions() const {
- return options;
+ column_selector.updateSelected(selectedColumns, opts);
}
CompressionKind RowReaderImpl::getCompression() const {
@@ -329,6 +325,18 @@ namespace orc {
reader->skip(currentRowInStripe);
}
+ const FileContents& RowReaderImpl::getFileContents() const {
+ return *contents;
+ }
+
+ bool RowReaderImpl::getThrowOnHive11DecimalOverflow() const {
+ return throwOnHive11DecimalOverflow;
+ }
+
+ int32_t RowReaderImpl::getForcedScaleOnHive11Decimal() const {
+ return forcedScaleOnHive11Decimal;
+ }
+
proto::StripeFooter RowReaderImpl::getStripeFooter
(const proto::StripeInformation& info) {
uint64_t stripeFooterStart = info.offset() + info.indexlength() +
@@ -340,9 +348,9 @@ namespace orc {
(new SeekableFileInputStream(contents->stream.get(),
stripeFooterStart,
stripeFooterLength,
- memoryPool)),
+ *contents->pool)),
contents->blockSize,
- memoryPool);
+ *contents->pool);
proto::StripeFooter result;
if (!result.ParseFromZeroCopyStream(pbStream.get())) {
throw ParseError(std::string("bad StripeFooter from ") +
@@ -359,7 +367,6 @@ namespace orc {
options(opts),
fileLength(_fileLength),
postscriptLength(_postscriptLength),
- memoryPool(*opts.getMemoryPool()),
footer(contents->footer.get()) {
isMetadataLoaded = false;
checkOrcVersion();
@@ -424,7 +431,7 @@ namespace orc {
stripeInfo.footerlength(),
stripeInfo.numberofrows(),
contents->stream.get(),
- memoryPool,
+ *contents->pool,
contents->compression,
contents->blockSize));
}
@@ -551,9 +558,9 @@ namespace orc {
(new SeekableFileInputStream(contents->stream.get(),
metadataStart,
metadataSize,
- memoryPool)),
+ *contents->pool)),
contents->blockSize,
- memoryPool);
+ *contents->pool);
metadata.reset(new proto::Metadata());
if (!metadata->ParseFromZeroCopyStream(pbStream.get())) {
throw ParseError("Failed to parse the metadata");
@@ -576,7 +583,12 @@ namespace orc {
}
}
- std::unique_ptr<RowReader> ReaderImpl::getRowReader(
+ std::unique_ptr<RowReader> ReaderImpl::createRowReader() const {
+ RowReaderOptions defaultOpts;
+ return createRowReader(defaultOpts);
+ }
+
+ std::unique_ptr<RowReader> ReaderImpl::createRowReader(
const RowReaderOptions& opts) const {
return std::unique_ptr<RowReader>(new RowReaderImpl(contents, opts));
}
@@ -758,7 +770,6 @@ namespace orc {
StripeStreamsImpl stripeStreams(*this, currentStripeFooter,
currentStripeInfo.offset(),
*(contents->stream.get()),
- memoryPool,
writerTimezone);
reader = buildReader(*contents->schema.get(), stripeStreams);
}
@@ -794,7 +805,7 @@ namespace orc {
std::unique_ptr<ColumnVectorBatch> RowReaderImpl::createRowBatch
(uint64_t capacity) const {
- return getSelectedType().createRowBatch(capacity, memoryPool);
+ return getSelectedType().createRowBatch(capacity, *contents->pool);
}
void ensureOrcFooter(InputStream* stream,
@@ -883,8 +894,9 @@ namespace orc {
std::unique_ptr<Reader> createReader(std::unique_ptr<InputStream> stream,
const ReaderOptions& options) {
- MemoryPool *memoryPool = options.getMemoryPool();
std::shared_ptr<FileContents> contents = std::shared_ptr<FileContents>(new FileContents());
+ contents->pool = options.getMemoryPool();
+ contents->errorStream = options.getErrorStream();
std::string serializedFooter = options.getSerializedFileTail();
uint64_t fileLength;
uint64_t postscriptLength;
@@ -908,7 +920,7 @@ namespace orc {
if (readSize < 4) {
throw ParseError("File size too small");
}
- DataBuffer<char> *buffer = new DataBuffer<char>(*memoryPool, readSize);
+ DataBuffer<char> *buffer = new DataBuffer<char>(*contents->pool, readSize);
stream->read(buffer->data(), readSize, fileLength - readSize);
postscriptLength = buffer->data()[readSize - 1] & 0xff;
@@ -927,7 +939,7 @@ namespace orc {
}
contents->footer = REDUNDANT_MOVE(readFooter(stream.get(), buffer,
- footerOffset, *contents->postscript, *memoryPool));
+ footerOffset, *contents->postscript, *contents->pool));
delete buffer;
}
contents->stream = std::move(stream);
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 3b2eac1..040d60a 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -30,10 +30,6 @@
namespace orc {
- class ReaderOptions;
- class RowReaderOptions;
- class StripeInformation;
-
static const uint64_t DIRECTORY_SIZE_GUESS = 16 * 1024;
/**
@@ -46,6 +42,8 @@ namespace orc {
std::unique_ptr<Type> schema;
uint64_t blockSize;
CompressionKind compression;
+ MemoryPool *pool;
+ std::ostream *errorStream;
};
class ReaderImpl;
@@ -95,13 +93,11 @@ namespace orc {
// contents
std::shared_ptr<FileContents> contents;
+ const bool throwOnHive11DecimalOverflow;
+ const int32_t forcedScaleOnHive11Decimal;
// inputs
std::vector<bool> selectedColumns;
- const RowReaderOptions& options;
-
- // custom memory pool
- MemoryPool& memoryPool;
// footer
proto::Footer* footer;
@@ -130,7 +126,7 @@ namespace orc {
* @param options options for reading
*/
RowReaderImpl(std::shared_ptr<FileContents> contents,
- const RowReaderOptions& options);
+ const RowReaderOptions& options);
// Select the columns from the options object
void updateSelected();
@@ -143,8 +139,6 @@ namespace orc {
bool next(ColumnVectorBatch& data) override;
- const RowReaderOptions& getRowReaderOptions() const;
-
CompressionKind getCompression() const;
uint64_t getCompressionSize() const;
@@ -153,8 +147,9 @@ namespace orc {
void seekToRow(uint64_t rowNumber) override;
- MemoryPool* getMemoryPool() const ;
-
+ const FileContents& getFileContents() const;
+ bool getThrowOnHive11DecimalOverflow() const;
+ int32_t getForcedScaleOnHive11Decimal() const;
};
class ReaderImpl : public Reader {
@@ -167,9 +162,6 @@ namespace orc {
const uint64_t fileLength;
const uint64_t postscriptLength;
- // custom memory pool
- MemoryPool& memoryPool;
-
// footer
proto::Footer* footer;
uint64_t numberOfStripes;
@@ -227,8 +219,10 @@ namespace orc {
std::unique_ptr<Statistics>
getStripeStatistics(uint64_t stripeIndex) const override;
- std::unique_ptr<RowReader> getRowReader(const RowReaderOptions& options
- ) const override;
+ std::unique_ptr<RowReader> createRowReader() const override;
+
+ std::unique_ptr<RowReader> createRowReader(const RowReaderOptions& options
+ ) const override;
uint64_t getContentLength() const override;
uint64_t getStripeStatisticsLength() const override;
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/StripeStream.cc
----------------------------------------------------------------------
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 1be58f1..07ac995 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -29,13 +29,11 @@ namespace orc {
const proto::StripeFooter& _footer,
uint64_t _stripeStart,
InputStream& _input,
- MemoryPool& _memoryPool,
const Timezone& _writerTimezone
): reader(_reader),
footer(_footer),
stripeStart(_stripeStart),
input(_input),
- memoryPool(_memoryPool),
writerTimezone(_writerTimezone) {
// PASS
}
@@ -57,10 +55,6 @@ namespace orc {
// PASS
}
- const RowReaderOptions& StripeStreamsImpl::getRowReaderOptions() const {
- return reader.getRowReaderOptions();
- }
-
const std::vector<bool> StripeStreamsImpl::getSelectedColumns() const {
return reader.getSelectedColumns();
}
@@ -74,11 +68,16 @@ namespace orc {
return writerTimezone;
}
- std::unique_ptr<SeekableInputStream>
+ std::ostream* StripeStreamsImpl::getErrorStream() const {
+ return reader.getFileContents().errorStream;
+ }
+
+ std::unique_ptr<SeekableInputStream>
StripeStreamsImpl::getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool shouldStream) const {
uint64_t offset = stripeStart;
+ MemoryPool *pool = reader.getFileContents().pool;
for(int i = 0; i < footer.streams_size(); ++i) {
const proto::Stream& stream = footer.streams(i);
if (stream.has_kind() &&
@@ -92,10 +91,10 @@ namespace orc {
(&input,
offset,
stream.length(),
- memoryPool,
+ *pool,
myBlock)),
reader.getCompressionSize(),
- memoryPool);
+ *pool);
}
offset += stream.length();
}
@@ -103,7 +102,15 @@ namespace orc {
}
MemoryPool& StripeStreamsImpl::getMemoryPool() const {
- return memoryPool;
+ return *reader.getFileContents().pool;
+ }
+
+ bool StripeStreamsImpl::getThrowOnHive11DecimalOverflow() const {
+ return reader.getThrowOnHive11DecimalOverflow();
+ }
+
+ int32_t StripeStreamsImpl::getForcedScaleOnHive11Decimal() const {
+ return reader.getForcedScaleOnHive11Decimal();
}
void StripeInformationImpl::ensureStripeFooterLoaded() const {
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/src/StripeStream.hh
----------------------------------------------------------------------
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index b3eb454..8f31397 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -40,7 +40,6 @@ namespace orc {
const proto::StripeFooter& footer;
const uint64_t stripeStart;
InputStream& input;
- MemoryPool& memoryPool;
const Timezone& writerTimezone;
public:
@@ -48,13 +47,10 @@ namespace orc {
const proto::StripeFooter& footer,
uint64_t stripeStart,
InputStream& input,
- MemoryPool& memoryPool,
const Timezone& writerTimezone);
virtual ~StripeStreamsImpl();
- virtual const RowReaderOptions& getRowReaderOptions() const override;
-
virtual const std::vector<bool> getSelectedColumns() const override;
virtual proto::ColumnEncoding getEncoding(uint64_t columnId
@@ -68,6 +64,12 @@ namespace orc {
MemoryPool& getMemoryPool() const override;
const Timezone& getWriterTimezone() const override;
+
+ std::ostream* getErrorStream() const override;
+
+ bool getThrowOnHive11DecimalOverflow() const override;
+
+ int32_t getForcedScaleOnHive11Decimal() const override;
};
/**
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/c++/test/TestColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index 7c41b57..d09ecca 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -41,11 +41,14 @@ public:
std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool stream) const override;
- MOCK_CONST_METHOD0(getRowReaderOptions, const RowReaderOptions&());
MOCK_CONST_METHOD0(getSelectedColumns, const std::vector<bool>());
MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (uint64_t));
MOCK_CONST_METHOD3(getStreamProxy, SeekableInputStream*
(uint64_t, proto::Stream_Kind, bool));
+ MOCK_CONST_METHOD0(getErrorStream, std::ostream*());
+ MOCK_CONST_METHOD0(getThrowOnHive11DecimalOverflow, bool());
+ MOCK_CONST_METHOD0(getForcedScaleOnHive11Decimal, int32_t());
+
MemoryPool& getMemoryPool() const {
return *getDefaultPool();
}
@@ -3179,15 +3182,14 @@ TEST(DecimalColumnReader, testDecimal128Skip) {
TEST(DecimalColumnReader, testDecimalHive11) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
-
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
EXPECT_CALL(streams, getSelectedColumns())
.WillRepeatedly(testing::Return(selectedColumns));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(true));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(6));
// set getEncoding
proto::ColumnEncoding directEncoding;
@@ -3258,12 +3260,10 @@ TEST(DecimalColumnReader, testDecimalHive11) {
TEST(DecimalColumnReader, testDecimalHive11Skip) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- rowReaderOptions.throwOnHive11DecimalOverflow(false)
- .forcedScaleOnHive11Decimal(3);
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(false));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(3));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
@@ -3365,11 +3365,10 @@ TEST(DecimalColumnReader, testDecimalHive11Skip) {
TEST(DecimalColumnReader, testDecimalHive11ScaleUp) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- rowReaderOptions.forcedScaleOnHive11Decimal(20);
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(true));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(20));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
@@ -3434,11 +3433,10 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleUp) {
TEST(DecimalColumnReader, testDecimalHive11ScaleDown) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- rowReaderOptions.forcedScaleOnHive11Decimal(0);
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(true));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(0));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
@@ -3523,10 +3521,10 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleDown) {
TEST(DecimalColumnReader, testDecimalHive11OverflowException) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(true));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(6));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
@@ -3581,10 +3579,10 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowException) {
TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(true));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(6));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
@@ -3639,13 +3637,14 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) {
TEST(DecimalColumnReader, testDecimalHive11OverflowNull) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
std::stringstream errStream;
- rowReaderOptions.throwOnHive11DecimalOverflow(false)
- .setErrorStream(errStream);
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getErrorStream())
+ .WillRepeatedly(testing::Return(&errStream));
+
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(false));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(6));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
@@ -3729,10 +3728,10 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowNull) {
TEST(DecimalColumnReader, testDecimalHive11BigBatches) {
MockStripeStreams streams;
- // set getRowReaderOptions()
- RowReaderOptions rowReaderOptions;
- EXPECT_CALL(streams, getRowReaderOptions())
- .WillRepeatedly(testing::ReturnRef(rowReaderOptions));
+ EXPECT_CALL(streams, getThrowOnHive11DecimalOverflow())
+ .WillRepeatedly(testing::Return(true));
+ EXPECT_CALL(streams, getForcedScaleOnHive11Decimal())
+ .WillRepeatedly(testing::Return(6));
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index 971cb2a..9ff86a0 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -26,12 +26,12 @@
#include <iostream>
#include <string>
-void printContents(const char* filename, const orc::RowReaderOptions rowReaderOpts) {
+void printContents(const char* filename, const orc::RowReaderOptions& rowReaderOpts) {
orc::ReaderOptions readerOpts;
std::unique_ptr<orc::Reader> reader;
std::unique_ptr<orc::RowReader> rowReader;
reader = orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
std::unique_ptr<orc::ColumnVectorBatch> batch = rowReader->createRowBatch(1000);
std::string line;
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/src/FileMemory.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc
index 6d03025..c133501 100644
--- a/tools/src/FileMemory.cc
+++ b/tools/src/FileMemory.cc
@@ -69,11 +69,10 @@ void processFile(const char* filename,
}
std::unique_ptr<orc::MemoryPool> pool(new TestMemoryPool());
readerOpts.setMemoryPool(*(pool.get()));
- rowReaderOpts.setMemoryPool(*(pool.get()));
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(std::string(filename)), readerOpts);
- std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
std::unique_ptr<orc::ColumnVectorBatch> batch =
rowReader->createRowBatch(batchSize);
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index 2f3f296..fd5a32b 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -28,10 +28,9 @@
void scanFile(std::ostream & out, const char* filename, uint64_t batchSize) {
orc::ReaderOptions readerOpts;
- orc::RowReaderOptions rowReaderOpts;
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename), readerOpts);
- std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
std::unique_ptr<orc::ColumnVectorBatch> batch =
rowReader->createRowBatch(batchSize);
http://git-wip-us.apache.org/repos/asf/orc/blob/4984cb2a/tools/test/TestMatch.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index 41ddb23..7ab6150 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -100,10 +100,9 @@ namespace orc {
TEST_P(FileParam, Metadata) {
orc::ReaderOptions readerOpts;
- orc::RowReaderOptions rowReaderOpts;
std::unique_ptr<Reader> reader =
createReader(readLocalFile(getFilename()), readerOpts);
- std::unique_ptr<RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<RowReader> rowReader = reader->createRowReader();
EXPECT_EQ(GetParam().compression, reader->getCompression());
EXPECT_EQ(GetParam().compressionSize, reader->getCompressionSize());
@@ -130,9 +129,8 @@ namespace orc {
TEST_P(FileParam, Contents) {
orc::ReaderOptions readerOpts;
- orc::RowReaderOptions rowReaderOpts;
std::unique_ptr<RowReader> rowReader =
- createReader(readLocalFile(getFilename()), readerOpts)->getRowReader(rowReaderOpts);
+ createReader(readLocalFile(getFilename()), readerOpts)->createRowReader();
unsigned long rowCount = 0;
std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
@@ -540,7 +538,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
std::string filename = findExample("demo-11-none.orc");
std::unique_ptr<Reader> reader =
createReader(readLocalFile(filename), readerOpts);
- std::unique_ptr<RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(CompressionKind_NONE, reader->getCompression());
EXPECT_EQ(256 * 1024, reader->getCompressionSize());
@@ -633,10 +631,10 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
offsetOpts.range(80000, 130722);
std::string filename = findExample("demo-11-none.orc");
std::unique_ptr<Reader> reader = createReader(readLocalFile(filename), opts);
- std::unique_ptr<RowReader> fullReader = reader->getRowReader(fullOpts);
- std::unique_ptr<RowReader> lastReader = reader->getRowReader(lastOpts);
- std::unique_ptr<RowReader> oobReader = reader->getRowReader(oobOpts);
- std::unique_ptr<RowReader> offsetReader = reader->getRowReader(offsetOpts);
+ std::unique_ptr<RowReader> fullReader = reader->createRowReader(fullOpts);
+ std::unique_ptr<RowReader> lastReader = reader->createRowReader(lastOpts);
+ std::unique_ptr<RowReader> oobReader = reader->createRowReader(oobOpts);
+ std::unique_ptr<RowReader> offsetReader = reader->createRowReader(offsetOpts);
std::unique_ptr<ColumnVectorBatch> oobBatch =
oobReader->createRowBatch(5000);
@@ -816,11 +814,10 @@ TEST(TestMatch, seekToRow) {
/* Test with a regular file */
{
orc::ReaderOptions readerOpts;
- orc::RowReaderOptions rowReaderOpts;
std::string filename = findExample("demo-11-none.orc");
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename), readerOpts);
- std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
EXPECT_EQ(1920800, reader->getNumberOfRows());
std::unique_ptr<orc::ColumnVectorBatch> batch =
@@ -857,7 +854,7 @@ TEST(TestMatch, seekToRow) {
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename), readerOpts);
- std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(1920800, reader->getNumberOfRows());
std::unique_ptr<orc::ColumnVectorBatch> batch =
@@ -884,11 +881,10 @@ TEST(TestMatch, seekToRow) {
/* Test with an empty file */
{
orc::ReaderOptions readerOpts;
- orc::RowReaderOptions rowReaderOpts;
std::string filename = findExample("TestOrcFile.emptyFile.orc");
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename), readerOpts);
- std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader();
EXPECT_EQ(0, reader->getNumberOfRows());
std::unique_ptr<orc::ColumnVectorBatch> batch =
@@ -929,7 +925,7 @@ TEST(TestMatch, selectColumns) {
// All columns
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename), readerOpts);
- std::unique_ptr<orc::RowReader> rowReader = reader->getRowReader(rowReaderOpts);
+ std::unique_ptr<orc::RowReader> rowReader = reader->createRowReader(rowReaderOpts);
std::vector<bool> c = rowReader->getSelectedColumns();
EXPECT_EQ(24, c.size());
for (unsigned int i=0; i < c.size(); i++) {
@@ -960,7 +956,7 @@ TEST(TestMatch, selectColumns) {
std::list<uint64_t> cols;
cols.push_back(1);
rowReaderOpts.include(cols);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
c = rowReader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i==2)
@@ -982,7 +978,7 @@ TEST(TestMatch, selectColumns) {
cols.clear();
cols.push_back(9);
rowReaderOpts.include(cols);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
c = rowReader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=10 && i<=14)
@@ -1006,7 +1002,7 @@ TEST(TestMatch, selectColumns) {
cols.clear();
cols.push_back(10);
rowReaderOpts.include(cols);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
c = rowReader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=15 && i<=18)
@@ -1027,7 +1023,7 @@ TEST(TestMatch, selectColumns) {
cols.clear();
cols.push_back(11);
rowReaderOpts.include(cols);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
c = rowReader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=19 && i<=23)
@@ -1056,7 +1052,7 @@ TEST(TestMatch, selectColumns) {
cols.push_back(22);
cols.push_back(23);
rowReaderOpts.includeTypes(cols);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
c = rowReader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=19 && i<=23)
@@ -1082,7 +1078,7 @@ TEST(TestMatch, selectColumns) {
colNames.push_back("middle.list.int1");
colNames.push_back("middle.list.string1");
rowReaderOpts.include(colNames);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
c = rowReader->getSelectedColumns();
for (unsigned int i=1; i < c.size(); i++) {
if (i>=10 && i<=14)
@@ -1116,7 +1112,7 @@ TEST(Reader, memoryUse) {
cols.push_back(1);
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(483517, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(10, batch->getMemoryUsage());
@@ -1129,7 +1125,7 @@ TEST(Reader, memoryUse) {
cols.push_back(7);
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(835906, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(18, batch->getMemoryUsage());
@@ -1140,7 +1136,7 @@ TEST(Reader, memoryUse) {
cols.push_back(8);
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(901442, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(18, batch->getMemoryUsage());
@@ -1151,7 +1147,7 @@ TEST(Reader, memoryUse) {
cols.push_back(9);
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(1294658, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(46, batch->getMemoryUsage());
@@ -1162,7 +1158,7 @@ TEST(Reader, memoryUse) {
cols.push_back(10);
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(1229122, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(45, batch->getMemoryUsage());
@@ -1173,7 +1169,7 @@ TEST(Reader, memoryUse) {
cols.push_back(11);
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(1491266, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(62, batch->getMemoryUsage());
@@ -1186,7 +1182,7 @@ TEST(Reader, memoryUse) {
}
rowReaderOpts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename), readerOpts);
- rowReader = reader->getRowReader(rowReaderOpts);
+ rowReader = reader->createRowReader(rowReaderOpts);
EXPECT_EQ(4112706, reader->getMemoryUseByFieldId(cols));
batch = rowReader->createRowBatch(1);
EXPECT_EQ(248, batch->getMemoryUsage());