You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/02/07 16:01:38 UTC
[2/3] orc git commit: ORC-135: PPD for timestamp is wrong when reader
and writer timezones are different zones. (prasanthj and omalley)
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 6b4daa8..354cb89 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -33,10 +33,15 @@ import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.sql.Date;
import java.sql.Timestamp;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.TimeZone;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -64,6 +69,8 @@ import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.OrcProto;
+import org.apache.orc.util.BloomFilterIO;
+import org.apache.orc.util.BloomFilterUtf8;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.MockSettings;
@@ -343,10 +350,12 @@ public class TestRecordReaderImpl {
return OrcProto.ColumnStatistics.newBuilder().setDateStatistics(dateStats.build()).build();
}
- private static OrcProto.ColumnStatistics createTimestampStats(long min, long max) {
+ private static final TimeZone utcTz = TimeZone.getTimeZone("UTC");
+
+ private static OrcProto.ColumnStatistics createTimestampStats(String min, String max) {
OrcProto.TimestampStatistics.Builder tsStats = OrcProto.TimestampStatistics.newBuilder();
- tsStats.setMinimum(min);
- tsStats.setMaximum(max);
+ tsStats.setMinimumUtc(getUtcTimestamp(min));
+ tsStats.setMaximumUtc(getUtcTimestamp(max));
return OrcProto.ColumnStatistics.newBuilder().setTimestampStatistics(tsStats.build()).build();
}
@@ -387,28 +396,96 @@ public class TestRecordReaderImpl {
.deserialize(createDecimalStats("111.1", "112.1"))));
}
+ static TruthValue evaluateBoolean(OrcProto.ColumnStatistics stats,
+ PredicateLeaf predicate) {
+ OrcProto.ColumnEncoding encoding =
+ OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT)
+ .build();
+ return RecordReaderImpl.evaluatePredicateProto(stats, predicate, null,
+ encoding, null,
+ OrcFile.WriterVersion.ORC_135, TypeDescription.Category.BOOLEAN);
+ }
+
+ static TruthValue evaluateInteger(OrcProto.ColumnStatistics stats,
+ PredicateLeaf predicate) {
+ OrcProto.ColumnEncoding encoding =
+ OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2)
+ .build();
+ return RecordReaderImpl.evaluatePredicateProto(stats, predicate, null,
+ encoding, null,
+ OrcFile.WriterVersion.ORC_135, TypeDescription.Category.LONG);
+ }
+
+ static TruthValue evaluateDouble(OrcProto.ColumnStatistics stats,
+ PredicateLeaf predicate) {
+ OrcProto.ColumnEncoding encoding =
+ OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT)
+ .build();
+ return RecordReaderImpl.evaluatePredicateProto(stats, predicate, null,
+ encoding, null,
+ OrcFile.WriterVersion.ORC_135, TypeDescription.Category.DOUBLE);
+ }
+
+ static TruthValue evaluateTimestamp(OrcProto.ColumnStatistics stats,
+ PredicateLeaf predicate,
+ boolean include135) {
+ OrcProto.ColumnEncoding encoding =
+ OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT)
+ .build();
+ return RecordReaderImpl.evaluatePredicateProto(stats, predicate, null,
+ encoding, null,
+ include135 ? OrcFile.WriterVersion.ORC_135: OrcFile.WriterVersion.ORC_101,
+ TypeDescription.Category.TIMESTAMP);
+ }
+
+ static TruthValue evaluateTimestampBloomfilter(OrcProto.ColumnStatistics stats,
+ PredicateLeaf predicate,
+ BloomFilter bloom,
+ OrcFile.WriterVersion version) {
+ OrcProto.ColumnEncoding.Builder encoding =
+ OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
+ if (version.includes(OrcFile.WriterVersion.ORC_135)) {
+ encoding.setBloomEncoding(BloomFilterIO.Encoding.UTF8_UTC.getId());
+ }
+ OrcProto.Stream.Kind kind =
+ version.includes(OrcFile.WriterVersion.ORC_101) ?
+ OrcProto.Stream.Kind.BLOOM_FILTER_UTF8 :
+ OrcProto.Stream.Kind.BLOOM_FILTER;
+ OrcProto.BloomFilter.Builder builder =
+ OrcProto.BloomFilter.newBuilder();
+ BloomFilterIO.serialize(builder, bloom);
+ return RecordReaderImpl.evaluatePredicateProto(stats, predicate, kind,
+ encoding.build(), builder.build(), version,
+ TypeDescription.Category.TIMESTAMP);
+ }
+
@Test
public void testPredEvalWithBooleanStats() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
+ evaluateBoolean(createBooleanStats(10, 10), pred));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
+ evaluateBoolean(createBooleanStats(10, 0), pred));
pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
+ evaluateBoolean(createBooleanStats(10, 10), pred));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
+ evaluateBoolean(createBooleanStats(10, 0), pred));
pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", false, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
+ evaluateBoolean(createBooleanStats(10, 10), pred));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
+ evaluateBoolean(createBooleanStats(10, 0), pred));
}
@Test
@@ -416,34 +493,34 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10, 100), pred));
// Stats gets converted to column type. "15" is outside of "10" and "100"
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10, 100), pred));
// Integer stats will not be converted date because of days/seconds/millis ambiguity
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10, 100), pred));
}
@Test
@@ -451,39 +528,39 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
// Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0"
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
// Double is not converted to date type because of days/seconds/millis ambiguity
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
+ evaluateDouble(createDoubleStats(10.0, 100.0), pred));
}
@Test
@@ -491,33 +568,33 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("10", "1000"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 100.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("10", "1000"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "100", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("10", "1000"), pred));
// IllegalArgumentException is thrown when converting String to Date, hence YES_NO
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 1000), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("100"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("10", "1000"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("10", "1000"), pred));
}
@Test
@@ -526,69 +603,69 @@ public class TestRecordReaderImpl {
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
// Date to Integer conversion is not possible.
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
// Date to Float conversion is also not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-11", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15.1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "__a15__1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "2000-01-16", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-16", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
// Date to Decimal conversion is also not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDateStats(10, 100), pred));
}
@Test
@@ -596,86 +673,86 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
// "15" out of range of "10.0" and "100.0"
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
// Decimal to Date not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
}
@Test
public void testPredEvalWithTimestampStats() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
- PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+ PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP,
+ "x", Timestamp.valueOf("2017-01-01 00:00:00"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateTimestamp(createTimestampStats("2017-01-01 00:00:00",
+ "2018-01-01 00:00:00"), pred, true));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
- assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
- assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ assertEquals(TruthValue.YES_NO_NULL,
+ evaluateTimestamp(createTimestampStats("2017-01-01 00:00:00", "2018-01-01 00:00:00"),
+ pred, true));
+ assertEquals(TruthValue.YES_NO_NULL,
+ evaluateTimestamp(createTimestampStats("2017-01-01 00:00:00", "2018-01-01 00:00:00"),
+ pred, true));
+ // pre orc-135 should always be yes_no_null.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.STRING, "x", "15", null);
- assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ PredicateLeaf.Type.TIMESTAMP, "x", Timestamp.valueOf("2017-01-01 00:00:00"), null);
+ assertEquals(TruthValue.YES_NO_NULL,
+ evaluateTimestamp(createTimestampStats("2017-01-01 00:00:00", "2017-01-01 00:00:00"),
+ pred, false));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.STRING, "x", new Timestamp(15).toString(), null);
+ PredicateLeaf.Type.STRING, "x", Timestamp.valueOf("2017-01-01 00:00:00").toString(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateTimestamp(createTimestampStats("2017-01-01 00:00:00", "2018-01-01 00:00:00"),
+ pred, true));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+ PredicateLeaf.Type.DATE, "x", Date.valueOf("2016-01-01"), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateTimestamp(createTimestampStats("2017-01-01 00:00:00", "2017-01-01 00:00:00"),
+ pred, true));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10 * 24L * 60L * 60L * 1000L,
- 100 * 24L * 60L * 60L * 1000L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateTimestamp(createTimestampStats("2015-01-01 00:00:00", "2016-01-01 00:00:00"),
+ pred, true));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
- assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
- assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
-
- pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
- assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
- assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ assertEquals(TruthValue.YES_NO_NULL,
+ evaluateTimestamp(createTimestampStats("2015-01-01 00:00:00", "2016-01-01 00:00:00"),
+ pred, true));
}
@Test
@@ -684,17 +761,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(20L, 30L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 30L), pred)) ;
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 30L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 15L), pred));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(0L, 10L), pred));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 15L), pred));
}
@Test
@@ -703,17 +780,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(20L, 30L), pred));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 30L), pred));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 30L), pred));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 15L), pred));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(0L, 10L), pred));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 15L), pred));
}
@Test
@@ -722,15 +799,15 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(20L, 30L), lessThan));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 30L), lessThan));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 30L), lessThan));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 15L), lessThan));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(0L, 10L), lessThan));
}
@Test
@@ -739,15 +816,15 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(20L, 30L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 30L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 30L), pred));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 15L), pred));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(0L, 10L), pred));
}
@Test
@@ -759,13 +836,13 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
"x", null, args);
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(20L, 20L), pred));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(30L, 30L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 30L), pred));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(12L, 18L), pred));
}
@Test
@@ -777,19 +854,19 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
"x", null, args);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(0L, 5L), pred));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(30L, 40L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(5L, 15L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(15L, 25L), pred));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(5L, 25L), pred));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(10L, 20L), pred));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(12L, 18L), pred));
}
@Test
@@ -798,7 +875,7 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
"x", null, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createIntStats(20L, 30L), pred));
}
@@ -808,17 +885,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
+ evaluateInteger(createStringStats("d", "e", true), pred)); // before
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
+ evaluateInteger(createStringStats("a", "b", true), pred)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
+ evaluateInteger(createStringStats("b", "c", true), pred)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
+ evaluateInteger(createStringStats("c", "d", true), pred)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
+ evaluateInteger(createStringStats("b", "d", true), pred)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
+ evaluateInteger(createStringStats("c", "c", true), pred)); // same
}
@Test
@@ -827,17 +904,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
+ evaluateInteger(createStringStats("d", "e", true), pred)); // before
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
+ evaluateInteger(createStringStats("a", "b", true), pred)); // after
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
+ evaluateInteger(createStringStats("b", "c", true), pred)); // max
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
+ evaluateInteger(createStringStats("c", "d", true), pred)); // min
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
+ evaluateInteger(createStringStats("b", "d", true), pred)); // middle
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
+ evaluateInteger(createStringStats("c", "c", true), pred)); // same
}
@Test
@@ -846,17 +923,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
+ evaluateInteger(createStringStats("d", "e", true), pred)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
+ evaluateInteger(createStringStats("a", "b", true), pred)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
+ evaluateInteger(createStringStats("b", "c", true), pred)); // max
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
+ evaluateInteger(createStringStats("c", "d", true), pred)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
+ evaluateInteger(createStringStats("b", "d", true), pred)); // middle
assertEquals(TruthValue.NO_NULL, // min, same stats
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "c", true), pred));
}
@Test
@@ -865,17 +942,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
+ evaluateInteger(createStringStats("d", "e", true), pred)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
+ evaluateInteger(createStringStats("a", "b", true), pred)); // after
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
+ evaluateInteger(createStringStats("b", "c", true), pred)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
+ evaluateInteger(createStringStats("c", "d", true), pred)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
+ evaluateInteger(createStringStats("b", "d", true), pred)); // middle
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
+ evaluateInteger(createStringStats("c", "c", true), pred)); // same
}
@Test
@@ -887,17 +964,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.NO_NULL, // before & after
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("d", "e", true), pred));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
+ evaluateInteger(createStringStats("a", "b", true), pred)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
+ evaluateInteger(createStringStats("e", "f", true), pred)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
+ evaluateInteger(createStringStats("c", "d", true), pred)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
+ evaluateInteger(createStringStats("b", "d", true), pred)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
+ evaluateInteger(createStringStats("c", "c", true), pred)); // same
}
@Test
@@ -909,31 +986,57 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.YES_NULL, // before & after
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("d", "e", true), pred));
assertEquals(TruthValue.YES_NULL, // before & max
- RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("e", "f", true), pred));
assertEquals(TruthValue.NO_NULL, // before & before
- RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("h", "g", true), pred));
assertEquals(TruthValue.YES_NO_NULL, // before & min
- RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("f", "g", true), pred));
assertEquals(TruthValue.YES_NO_NULL, // before & middle
- RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("e", "g", true), pred));
assertEquals(TruthValue.YES_NULL, // min & after
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "e", true), pred));
assertEquals(TruthValue.YES_NULL, // min & max
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "f", true), pred));
assertEquals(TruthValue.YES_NO_NULL, // min & middle
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "g", true), pred));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
+ evaluateInteger(createStringStats("a", "b", true), pred)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
+ evaluateInteger(createStringStats("a", "c", true), pred)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
+ evaluateInteger(createStringStats("b", "d", true), pred)); // middle
assertEquals(TruthValue.YES_NULL, // min & after, same stats
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "c", true), pred));
+ }
+
+ @Test
+ public void testTimestampStatsOldFiles() throws Exception {
+ PredicateLeaf pred = createPredicateLeaf
+ (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.TIMESTAMP,
+ "x", Timestamp.valueOf("2000-01-01 00:00:00"), null);
+ OrcProto.ColumnStatistics cs = createTimestampStats("2000-01-01 00:00:00", "2001-01-01 00:00:00");
+ assertEquals(TruthValue.YES_NO_NULL,
+ evaluateTimestampBloomfilter(cs, pred, new BloomFilterUtf8(10000, 0.01), OrcFile.WriterVersion.ORC_101));
+ BloomFilterUtf8 bf = new BloomFilterUtf8(10, 0.05);
+ bf.addLong(getUtcTimestamp("2000-06-01 00:00:00"));
+ assertEquals(TruthValue.NO_NULL,
+ evaluateTimestampBloomfilter(cs, pred, bf, OrcFile.WriterVersion.ORC_135));
+ assertEquals(TruthValue.YES_NO_NULL,
+ evaluateTimestampBloomfilter(cs, pred, bf, OrcFile.WriterVersion.ORC_101));
+ }
+
+ private static long getUtcTimestamp(String ts) {
+ DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ dateFormat.setTimeZone(utcTz);
+ try {
+ return dateFormat.parse(ts).getTime();
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Can't parse " + ts, e);
+ }
}
@Test
@@ -942,9 +1045,9 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
"x", null, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "d", true), pred));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
+ evaluateInteger(createStringStats("c", "d", false), pred));
}
@Test
@@ -1528,77 +1631,6 @@ public class TestRecordReaderImpl {
}
@Test
- public void testTimestampNullSafeEqualsBloomFilter() throws Exception {
- PredicateLeaf pred = createPredicateLeaf(
- PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x",
- new Timestamp(15),
- null);
- BloomFilter bf = new BloomFilter(10000);
- for (int i = 20; i < 1000; i++) {
- bf.addLong((new Timestamp(i)).getTime());
- }
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
- assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
- bf.addLong((new Timestamp(15)).getTime());
- assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
- }
-
- @Test
- public void testTimestampEqualsBloomFilter() throws Exception {
- PredicateLeaf pred = createPredicateLeaf(
- PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
- BloomFilter bf = new BloomFilter(10000);
- for (int i = 20; i < 1000; i++) {
- bf.addLong((new Timestamp(i)).getTime());
- }
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
- assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
- bf.addLong((new Timestamp(15)).getTime());
- assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
- }
-
- @Test
- public void testTimestampInBloomFilter() throws Exception {
- List<Object> args = new ArrayList<Object>();
- args.add(new Timestamp(15));
- args.add(new Timestamp(19));
- PredicateLeaf pred = createPredicateLeaf
- (PredicateLeaf.Operator.IN, PredicateLeaf.Type.TIMESTAMP,
- "x", null, args);
- BloomFilter bf = new BloomFilter(10000);
- for (int i = 20; i < 1000; i++) {
- bf.addLong((new Timestamp(i)).getTime());
- }
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
- assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
- bf.addLong((new Timestamp(19)).getTime());
- assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
- bf.addLong((new Timestamp(15)).getTime());
- assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
- }
-
- @Test
- public void testDecimalNullSafeEqualsBloomFilter() throws Exception {
- PredicateLeaf pred = createPredicateLeaf(
- PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x",
- new HiveDecimalWritable("15"),
- null);
- BloomFilter bf = new BloomFilter(10000);
- for (int i = 20; i < 1000; i++) {
- bf.addString(HiveDecimal.create(i).toString());
- }
- ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
- assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
- bf.addString(HiveDecimal.create(15).toString());
- assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
- }
-
- @Test
public void testDecimalEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x",
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/core/src/test/org/apache/orc/util/TestBloomFilter.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/util/TestBloomFilter.java b/java/core/src/test/org/apache/orc/util/TestBloomFilter.java
index fcfc8f5..8ce99e2 100644
--- a/java/core/src/test/org/apache/orc/util/TestBloomFilter.java
+++ b/java/core/src/test/org/apache/orc/util/TestBloomFilter.java
@@ -73,10 +73,15 @@ public class TestBloomFilter {
byte[] expected = new byte[]{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40,
(byte) 0x80, ~0x01, ~0x02, ~0x04, ~0x08, ~0x10, ~0x20, ~0x40,
(byte) ~0x80};
+ OrcProto.ColumnEncoding.Builder encoding =
+ OrcProto.ColumnEncoding.newBuilder();
+ encoding.setKind(OrcProto.ColumnEncoding.Kind.DIRECT)
+ .setBloomEncoding(BloomFilterIO.Encoding.UTF8_UTC.getId());
assertArrayEquals(expected, bs.toByteArray());
BloomFilter rebuilt = BloomFilterIO.deserialize(
OrcProto.Stream.Kind.BLOOM_FILTER_UTF8,
- OrcFile.WriterVersion.ORC_101,
+ encoding.build(),
+ OrcFile.WriterVersion.ORC_135,
TypeDescription.Category.INT,
proto);
assertEquals(bloom, rebuilt);
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/tools/src/java/org/apache/orc/tools/FileDump.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index 7b79ce5..e187d7e 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -387,7 +387,8 @@ public final class FileDump {
buf.append(rowIdxString);
String bloomFilString = getFormattedBloomFilters(col, indices,
reader.getWriterVersion(),
- reader.getSchema().findSubtype(col).getCategory());
+ reader.getSchema().findSubtype(col).getCategory(),
+ footer.getColumns(col));
buf.append(bloomFilString);
System.out.println(buf);
}
@@ -610,7 +611,8 @@ public final class FileDump {
private static String getFormattedBloomFilters(int col, OrcIndex index,
OrcFile.WriterVersion version,
- TypeDescription.Category type) {
+ TypeDescription.Category type,
+ OrcProto.ColumnEncoding encoding) {
OrcProto.BloomFilterIndex[] bloomFilterIndex = index.getBloomFilterIndex();
StringBuilder buf = new StringBuilder();
BloomFilter stripeLevelBF = null;
@@ -619,7 +621,7 @@ public final class FileDump {
buf.append("\n Bloom filters for column ").append(col).append(":");
for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
BloomFilter toMerge = BloomFilterIO.deserialize(
- index.getBloomFilterKinds()[col], version, type, bf);
+ index.getBloomFilterKinds()[col], encoding, version, type, bf);
buf.append("\n Entry ").append(idx++).append(":").append(getBloomFilterStats(toMerge));
if (stripeLevelBF == null) {
stripeLevelBF = toMerge;
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index 0de07ad..4ea9463 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -193,7 +193,8 @@ public class JsonFileDump {
writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
writeBloomFilterIndexes(writer, col, indices,
reader.getWriterVersion(),
- reader.getSchema().findSubtype(col).getCategory());
+ reader.getSchema().findSubtype(col).getCategory(),
+ footer.getColumns(col));
writer.endObject();
}
writer.endArray();
@@ -344,7 +345,8 @@ public class JsonFileDump {
private static void writeBloomFilterIndexes(JSONWriter writer, int col,
OrcIndex index,
OrcFile.WriterVersion version,
- TypeDescription.Category type
+ TypeDescription.Category type,
+ OrcProto.ColumnEncoding encoding
) throws JSONException {
BloomFilter stripeLevelBF = null;
@@ -356,7 +358,7 @@ public class JsonFileDump {
writer.object();
writer.key("entryId").value(entryIx++);
BloomFilter toMerge = BloomFilterIO.deserialize(
- index.getBloomFilterKinds()[col], version, type, bf);
+ index.getBloomFilterKinds()[col], encoding, version, type, bf);
writeBloomFilterStats(writer, toMerge);
if (stripeLevelBF == null) {
stripeLevelBF = toMerge;
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index e23327a..dcf29f7 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_101
+File Version: 0.12 with ORC_135
Rows: 21000
Compression: ZLIB
Compression size: 4096
@@ -39,7 +39,7 @@ File Statistics:
Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
Stripes:
- Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 749
+ Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 749
Stream: column 0 section ROW_INDEX start: 3 length 17
Stream: column 1 section ROW_INDEX start: 20 length 166
Stream: column 2 section ROW_INDEX start: 186 length 169
@@ -67,17 +67,17 @@ Stripes:
Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 64624 data: 63775 rows: 5000 tail: 86 index: 742
- Stream: column 0 section ROW_INDEX start: 64624 length 17
- Stream: column 1 section ROW_INDEX start: 64641 length 164
- Stream: column 2 section ROW_INDEX start: 64805 length 168
- Stream: column 3 section ROW_INDEX start: 64973 length 83
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 65056 length 310
- Stream: column 1 section DATA start: 65366 length 20035
- Stream: column 2 section DATA start: 85401 length 40050
- Stream: column 3 section DATA start: 125451 length 3532
- Stream: column 3 section LENGTH start: 128983 length 25
- Stream: column 3 section DICTIONARY_DATA start: 129008 length 133
+ Stripe: offset: 64625 data: 63775 rows: 5000 tail: 87 index: 742
+ Stream: column 0 section ROW_INDEX start: 64625 length 17
+ Stream: column 1 section ROW_INDEX start: 64642 length 164
+ Stream: column 2 section ROW_INDEX start: 64806 length 168
+ Stream: column 3 section ROW_INDEX start: 64974 length 83
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 65057 length 310
+ Stream: column 1 section DATA start: 65367 length 20035
+ Stream: column 2 section DATA start: 85402 length 40050
+ Stream: column 3 section DATA start: 125452 length 3532
+ Stream: column 3 section LENGTH start: 128984 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 129009 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -95,17 +95,17 @@ Stripes:
Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 129227 data: 63787 rows: 5000 tail: 86 index: 748
- Stream: column 0 section ROW_INDEX start: 129227 length 17
- Stream: column 1 section ROW_INDEX start: 129244 length 163
- Stream: column 2 section ROW_INDEX start: 129407 length 168
- Stream: column 3 section ROW_INDEX start: 129575 length 90
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 129665 length 310
- Stream: column 1 section DATA start: 129975 length 20035
- Stream: column 2 section DATA start: 150010 length 40050
- Stream: column 3 section DATA start: 190060 length 3544
- Stream: column 3 section LENGTH start: 193604 length 25
- Stream: column 3 section DICTIONARY_DATA start: 193629 length 133
+ Stripe: offset: 129229 data: 63787 rows: 5000 tail: 87 index: 748
+ Stream: column 0 section ROW_INDEX start: 129229 length 17
+ Stream: column 1 section ROW_INDEX start: 129246 length 163
+ Stream: column 2 section ROW_INDEX start: 129409 length 168
+ Stream: column 3 section ROW_INDEX start: 129577 length 90
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 129667 length 310
+ Stream: column 1 section DATA start: 129977 length 20035
+ Stream: column 2 section DATA start: 150012 length 40050
+ Stream: column 3 section DATA start: 190062 length 3544
+ Stream: column 3 section LENGTH start: 193606 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 193631 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -123,17 +123,17 @@ Stripes:
Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 193848 data: 63817 rows: 5000 tail: 85 index: 750
- Stream: column 0 section ROW_INDEX start: 193848 length 17
- Stream: column 1 section ROW_INDEX start: 193865 length 165
- Stream: column 2 section ROW_INDEX start: 194030 length 167
- Stream: column 3 section ROW_INDEX start: 194197 length 91
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 194288 length 310
- Stream: column 1 section DATA start: 194598 length 20035
- Stream: column 2 section DATA start: 214633 length 40050
- Stream: column 3 section DATA start: 254683 length 3574
- Stream: column 3 section LENGTH start: 258257 length 25
- Stream: column 3 section DICTIONARY_DATA start: 258282 length 133
+ Stripe: offset: 193851 data: 63817 rows: 5000 tail: 86 index: 750
+ Stream: column 0 section ROW_INDEX start: 193851 length 17
+ Stream: column 1 section ROW_INDEX start: 193868 length 165
+ Stream: column 2 section ROW_INDEX start: 194033 length 167
+ Stream: column 3 section ROW_INDEX start: 194200 length 91
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 194291 length 310
+ Stream: column 1 section DATA start: 194601 length 20035
+ Stream: column 2 section DATA start: 214636 length 40050
+ Stream: column 3 section DATA start: 254686 length 3574
+ Stream: column 3 section LENGTH start: 258260 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 258285 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -151,17 +151,17 @@ Stripes:
Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 258500 data: 12943 rows: 1000 tail: 78 index: 375
- Stream: column 0 section ROW_INDEX start: 258500 length 12
- Stream: column 1 section ROW_INDEX start: 258512 length 38
- Stream: column 2 section ROW_INDEX start: 258550 length 41
- Stream: column 3 section ROW_INDEX start: 258591 length 40
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 258631 length 244
- Stream: column 1 section DATA start: 258875 length 4007
- Stream: column 2 section DATA start: 262882 length 8010
- Stream: column 3 section DATA start: 270892 length 768
- Stream: column 3 section LENGTH start: 271660 length 25
- Stream: column 3 section DICTIONARY_DATA start: 271685 length 133
+ Stripe: offset: 258504 data: 12943 rows: 1000 tail: 80 index: 375
+ Stream: column 0 section ROW_INDEX start: 258504 length 12
+ Stream: column 1 section ROW_INDEX start: 258516 length 38
+ Stream: column 2 section ROW_INDEX start: 258554 length 41
+ Stream: column 3 section ROW_INDEX start: 258595 length 40
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 258635 length 244
+ Stream: column 1 section DATA start: 258879 length 4007
+ Stream: column 2 section DATA start: 262886 length 8010
+ Stream: column 3 section DATA start: 270896 length 768
+ Stream: column 3 section LENGTH start: 271664 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 271689 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -172,7 +172,7 @@ Stripes:
Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-File length: 272444 bytes
+File length: 272450 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
index 8296382..4ea04b5 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_101
+File Version: 0.12 with ORC_135
Rows: 21000
Compression: ZLIB
Compression size: 4096
@@ -39,7 +39,7 @@ File Statistics:
Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
Stripes:
- Stripe: offset: 3 data: 63786 rows: 5000 tail: 104 index: 14949
+ Stripe: offset: 3 data: 63786 rows: 5000 tail: 108 index: 14949
Stream: column 0 section ROW_INDEX start: 3 length 17
Stream: column 1 section ROW_INDEX start: 20 length 166
Stream: column 2 section ROW_INDEX start: 186 length 169
@@ -70,20 +70,20 @@ Stripes:
Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772
Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482
- Stripe: offset: 78842 data: 63775 rows: 5000 tail: 103 index: 14940
- Stream: column 0 section ROW_INDEX start: 78842 length 17
- Stream: column 1 section ROW_INDEX start: 78859 length 164
- Stream: column 2 section ROW_INDEX start: 79023 length 168
- Stream: column 2 section BLOOM_FILTER start: 79191 length 6533
- Stream: column 2 section BLOOM_FILTER_UTF8 start: 85724 length 6046
- Stream: column 3 section ROW_INDEX start: 91770 length 83
- Stream: column 3 section BLOOM_FILTER start: 91853 length 1038
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 92891 length 891
- Stream: column 1 section DATA start: 93782 length 20035
- Stream: column 2 section DATA start: 113817 length 40050
- Stream: column 3 section DATA start: 153867 length 3532
- Stream: column 3 section LENGTH start: 157399 length 25
- Stream: column 3 section DICTIONARY_DATA start: 157424 length 133
+ Stripe: offset: 78846 data: 63775 rows: 5000 tail: 107 index: 14940
+ Stream: column 0 section ROW_INDEX start: 78846 length 17
+ Stream: column 1 section ROW_INDEX start: 78863 length 164
+ Stream: column 2 section ROW_INDEX start: 79027 length 168
+ Stream: column 2 section BLOOM_FILTER start: 79195 length 6533
+ Stream: column 2 section BLOOM_FILTER_UTF8 start: 85728 length 6046
+ Stream: column 3 section ROW_INDEX start: 91774 length 83
+ Stream: column 3 section BLOOM_FILTER start: 91857 length 1038
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 92895 length 891
+ Stream: column 1 section DATA start: 93786 length 20035
+ Stream: column 2 section DATA start: 113821 length 40050
+ Stream: column 3 section DATA start: 153871 length 3532
+ Stream: column 3 section LENGTH start: 157403 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 157428 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -101,20 +101,20 @@ Stripes:
Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959
Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205
- Stripe: offset: 157660 data: 63787 rows: 5000 tail: 104 index: 14946
- Stream: column 0 section ROW_INDEX start: 157660 length 17
- Stream: column 1 section ROW_INDEX start: 157677 length 163
- Stream: column 2 section ROW_INDEX start: 157840 length 168
- Stream: column 2 section BLOOM_FILTER start: 158008 length 6533
- Stream: column 2 section BLOOM_FILTER_UTF8 start: 164541 length 6046
- Stream: column 3 section ROW_INDEX start: 170587 length 90
- Stream: column 3 section BLOOM_FILTER start: 170677 length 1038
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 171715 length 891
- Stream: column 1 section DATA start: 172606 length 20035
- Stream: column 2 section DATA start: 192641 length 40050
- Stream: column 3 section DATA start: 232691 length 3544
- Stream: column 3 section LENGTH start: 236235 length 25
- Stream: column 3 section DICTIONARY_DATA start: 236260 length 133
+ Stripe: offset: 157668 data: 63787 rows: 5000 tail: 108 index: 14946
+ Stream: column 0 section ROW_INDEX start: 157668 length 17
+ Stream: column 1 section ROW_INDEX start: 157685 length 163
+ Stream: column 2 section ROW_INDEX start: 157848 length 168
+ Stream: column 2 section BLOOM_FILTER start: 158016 length 6533
+ Stream: column 2 section BLOOM_FILTER_UTF8 start: 164549 length 6046
+ Stream: column 3 section ROW_INDEX start: 170595 length 90
+ Stream: column 3 section BLOOM_FILTER start: 170685 length 1038
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 171723 length 891
+ Stream: column 1 section DATA start: 172614 length 20035
+ Stream: column 2 section DATA start: 192649 length 40050
+ Stream: column 3 section DATA start: 232699 length 3544
+ Stream: column 3 section LENGTH start: 236243 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 236268 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -132,20 +132,20 @@ Stripes:
Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797
Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444
- Stripe: offset: 236497 data: 63817 rows: 5000 tail: 103 index: 14939
- Stream: column 0 section ROW_INDEX start: 236497 length 17
- Stream: column 1 section ROW_INDEX start: 236514 length 165
- Stream: column 2 section ROW_INDEX start: 236679 length 167
- Stream: column 2 section BLOOM_FILTER start: 236846 length 6524
- Stream: column 2 section BLOOM_FILTER_UTF8 start: 243370 length 6046
- Stream: column 3 section ROW_INDEX start: 249416 length 91
- Stream: column 3 section BLOOM_FILTER start: 249507 length 1038
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 250545 length 891
- Stream: column 1 section DATA start: 251436 length 20035
- Stream: column 2 section DATA start: 271471 length 40050
- Stream: column 3 section DATA start: 311521 length 3574
- Stream: column 3 section LENGTH start: 315095 length 25
- Stream: column 3 section DICTIONARY_DATA start: 315120 length 133
+ Stripe: offset: 236509 data: 63817 rows: 5000 tail: 107 index: 14939
+ Stream: column 0 section ROW_INDEX start: 236509 length 17
+ Stream: column 1 section ROW_INDEX start: 236526 length 165
+ Stream: column 2 section ROW_INDEX start: 236691 length 167
+ Stream: column 2 section BLOOM_FILTER start: 236858 length 6524
+ Stream: column 2 section BLOOM_FILTER_UTF8 start: 243382 length 6046
+ Stream: column 3 section ROW_INDEX start: 249428 length 91
+ Stream: column 3 section BLOOM_FILTER start: 249519 length 1038
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 250557 length 891
+ Stream: column 1 section DATA start: 251448 length 20035
+ Stream: column 2 section DATA start: 271483 length 40050
+ Stream: column 3 section DATA start: 311533 length 3574
+ Stream: column 3 section LENGTH start: 315107 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 315132 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -163,20 +163,20 @@ Stripes:
Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649
Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165
- Stripe: offset: 315356 data: 12943 rows: 1000 tail: 96 index: 3546
- Stream: column 0 section ROW_INDEX start: 315356 length 12
- Stream: column 1 section ROW_INDEX start: 315368 length 38
- Stream: column 2 section ROW_INDEX start: 315406 length 41
- Stream: column 2 section BLOOM_FILTER start: 315447 length 1337
- Stream: column 2 section BLOOM_FILTER_UTF8 start: 316784 length 1211
- Stream: column 3 section ROW_INDEX start: 317995 length 40
- Stream: column 3 section BLOOM_FILTER start: 318035 length 472
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 318507 length 395
- Stream: column 1 section DATA start: 318902 length 4007
- Stream: column 2 section DATA start: 322909 length 8010
- Stream: column 3 section DATA start: 330919 length 768
- Stream: column 3 section LENGTH start: 331687 length 25
- Stream: column 3 section DICTIONARY_DATA start: 331712 length 133
+ Stripe: offset: 315372 data: 12943 rows: 1000 tail: 102 index: 3546
+ Stream: column 0 section ROW_INDEX start: 315372 length 12
+ Stream: column 1 section ROW_INDEX start: 315384 length 38
+ Stream: column 2 section ROW_INDEX start: 315422 length 41
+ Stream: column 2 section BLOOM_FILTER start: 315463 length 1337
+ Stream: column 2 section BLOOM_FILTER_UTF8 start: 316800 length 1211
+ Stream: column 3 section ROW_INDEX start: 318011 length 40
+ Stream: column 3 section BLOOM_FILTER start: 318051 length 472
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 318523 length 395
+ Stream: column 1 section DATA start: 318918 length 4007
+ Stream: column 2 section DATA start: 322925 length 8010
+ Stream: column 3 section DATA start: 330935 length 768
+ Stream: column 3 section LENGTH start: 331703 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 331728 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -187,7 +187,7 @@ Stripes:
Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
-File length: 332489 bytes
+File length: 332511 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/f2b8b799/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
index b0315b4..78e0258 100644
--- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_101
+File Version: 0.12 with ORC_135
Rows: 21000
Compression: ZLIB
Compression size: 4096