You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/09/22 19:21:54 UTC
[2/4] orc git commit: ORC-101 Correct bloom filters for strings and
decimals to use utf8 encoding.
http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 6d1955d..f159eef 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -19,9 +19,11 @@
package org.apache.orc.impl;
import static junit.framework.Assert.assertEquals;
+import static junit.framework.TestCase.fail;
import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.any;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
@@ -33,9 +35,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
-import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
@@ -46,7 +48,7 @@ import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.hive.common.io.DiskRangeList;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl;
-import org.apache.orc.BloomFilterIO;
+import org.apache.orc.util.BloomFilter;
import org.apache.orc.DataReader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
@@ -62,6 +64,7 @@ import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.OrcProto;
+import org.junit.Assert;
import org.junit.Test;
import org.mockito.MockSettings;
import org.mockito.Mockito;
@@ -375,23 +378,23 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", false, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.BOOLEAN));
}
@Test
@@ -399,34 +402,34 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// Stats gets converted to column type. "15" is outside of "10" and "100"
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// Integer stats will not be converted date because of days/seconds/millis ambiguity
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -434,39 +437,39 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0"
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
// Double is not converted to date type because of days/seconds/millis ambiguity
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.DOUBLE));
}
@Test
@@ -474,33 +477,33 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 100.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "100", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// IllegalArgumentException is thrown when converting String to Date, hence YES_NO
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("100"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -509,69 +512,69 @@ public class TestRecordReaderImpl {
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
// Date to Integer conversion is not possible.
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// Date to Float conversion is also not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-11", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15.1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "__a15__1", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "2000-01-16", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "1970-01-16", null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// Date to Decimal conversion is also not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -579,39 +582,39 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// "15" out of range of "10.0" and "100.0"
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
// Decimal to Date not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -619,46 +622,46 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.FLOAT, "x", 15.0, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", "15", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.STRING, "x", new Timestamp(15).toString(), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10 * 24L * 60L * 60L * 1000L,
- 100 * 24L * 60L * 60L * 1000L), pred, null));
+ 100 * 24L * 60L * 60L * 1000L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -667,17 +670,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -686,17 +689,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -705,15 +708,15 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -722,15 +725,15 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
"x", 15L, null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -742,13 +745,13 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
"x", null, args);
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -760,19 +763,19 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
"x", null, args);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -781,7 +784,7 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
"x", null, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@@ -791,17 +794,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
}
@Test
@@ -810,17 +813,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
}
@Test
@@ -829,17 +832,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
assertEquals(TruthValue.NO_NULL, // min, same stats
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -848,17 +851,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
"x", "c", null);
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // before
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
}
@Test
@@ -870,17 +873,17 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.NO_NULL, // before & after
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // min
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
assertEquals(TruthValue.YES_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // same
}
@Test
@@ -892,31 +895,31 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
"x", null, args);
assertEquals(TruthValue.YES_NULL, // before & after
- RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL, // before & max
- RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL, // before & before
- RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL, // before & min
- RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL, // before & middle
- RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL, // min & after
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NULL, // min & max
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.YES_NO_NULL, // min & middle
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // after
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null)); // max
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // max
assertEquals(TruthValue.YES_NO_NULL,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG)); // middle
assertEquals(TruthValue.YES_NULL, // min & after, same stats
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -925,9 +928,9 @@ public class TestRecordReaderImpl {
(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
"x", null, null);
assertEquals(TruthValue.YES_NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
assertEquals(TruthValue.NO,
- RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null));
+ RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null, null, OrcFile.WriterVersion.ORC_101, TypeDescription.Category.LONG));
}
@Test
@@ -1304,7 +1307,7 @@ public class TestRecordReaderImpl {
public void testIntNullSafeEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong(i);
}
@@ -1319,7 +1322,7 @@ public class TestRecordReaderImpl {
public void testIntEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong(i);
}
@@ -1338,7 +1341,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong(i);
}
@@ -1356,7 +1359,7 @@ public class TestRecordReaderImpl {
public void testDoubleNullSafeEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addDouble(i);
}
@@ -1371,7 +1374,7 @@ public class TestRecordReaderImpl {
public void testDoubleEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addDouble(i);
}
@@ -1390,7 +1393,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.FLOAT,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addDouble(i);
}
@@ -1408,7 +1411,7 @@ public class TestRecordReaderImpl {
public void testStringNullSafeEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString("str_" + i);
}
@@ -1423,7 +1426,7 @@ public class TestRecordReaderImpl {
public void testStringEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString("str_" + i);
}
@@ -1442,7 +1445,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString("str_" + i);
}
@@ -1461,7 +1464,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x",
new DateWritable(15).get(), null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong((new DateWritable(i)).getDays());
}
@@ -1477,7 +1480,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x",
new DateWritable(15).get(), null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong((new DateWritable(i)).getDays());
}
@@ -1496,7 +1499,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong((new DateWritable(i)).getDays());
}
@@ -1516,7 +1519,7 @@ public class TestRecordReaderImpl {
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x",
new Timestamp(15),
null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong((new Timestamp(i)).getTime());
}
@@ -1531,7 +1534,7 @@ public class TestRecordReaderImpl {
public void testTimestampEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong((new Timestamp(i)).getTime());
}
@@ -1550,7 +1553,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.TIMESTAMP,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addLong((new Timestamp(i)).getTime());
}
@@ -1570,7 +1573,7 @@ public class TestRecordReaderImpl {
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x",
new HiveDecimalWritable("15"),
null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString(HiveDecimal.create(i).toString());
}
@@ -1587,7 +1590,7 @@ public class TestRecordReaderImpl {
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x",
new HiveDecimalWritable("15"),
null);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString(HiveDecimal.create(i).toString());
}
@@ -1606,7 +1609,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString(HiveDecimal.create(i).toString());
}
@@ -1629,7 +1632,7 @@ public class TestRecordReaderImpl {
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL,
"x", null, args);
- BloomFilterIO bf = new BloomFilterIO(10000);
+ BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
bf.addString(HiveDecimal.create(i).toString());
}
@@ -1692,4 +1695,171 @@ public class TestRecordReaderImpl {
recordReader.close();
}
+
+ @Test
+ public void TestOldBloomFilters() throws Exception {
+ OrcProto.StripeFooter footer =
+ OrcProto.StripeFooter.newBuilder()
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(1).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(1).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .build();
+ TypeDescription schema = TypeDescription.fromString("struct<x:int,y:decimal(10,2),z:string>");
+ OrcProto.Stream.Kind[] bloomFilterKinds = new OrcProto.Stream.Kind[4];
+
+ // normal read
+ DiskRangeList ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ false, new boolean[]{true, true, false, true},
+ new boolean[]{false, true, false, true},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[3]);
+ assertEquals("range start: 0 end: 2000", ranges.toString());
+ assertEquals("range start: 4000 end: 6000", ranges.next.toString());
+ assertEquals(null, ranges.next.next);
+
+ // ignore non-utf8 bloom filter
+ Arrays.fill(bloomFilterKinds, null);
+ ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ true, new boolean[]{true, true, false, true},
+ new boolean[]{false, true, false, true},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(null, bloomFilterKinds[3]);
+ assertEquals("range start: 0 end: 2000", ranges.toString());
+ assertEquals("range start: 4000 end: 5000", ranges.next.toString());
+ assertEquals(null, ranges.next.next);
+
+ // check that we are handling the post hive-12055 strings correctly
+ Arrays.fill(bloomFilterKinds, null);
+ ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ true, null, new boolean[]{false, true, true, true},
+ OrcFile.WriterVersion.HIVE_12055, bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(null, bloomFilterKinds[2]);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[3]);
+ assertEquals("range start: 0 end: 3000", ranges.toString());
+ assertEquals("range start: 4000 end: 6000", ranges.next.toString());
+ assertEquals(null, ranges.next.next);
+
+ // ignore non-utf8 bloom filter on decimal
+ Arrays.fill(bloomFilterKinds, null);
+ ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ true, null,
+ new boolean[]{false, false, true, false},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(null, bloomFilterKinds[2]);
+ assertEquals("range start: 0 end: 1000", ranges.toString());
+ assertEquals("range start: 2000 end: 3000", ranges.next.toString());
+ assertEquals("range start: 4000 end: 5000", ranges.next.next.toString());
+ assertEquals(null, ranges.next.next.next);
+ }
+
+ @Test
+ public void TestCompatibleBloomFilters() throws Exception {
+ OrcProto.StripeFooter footer =
+ OrcProto.StripeFooter.newBuilder()
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(1).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(1).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build())
+ .build();
+ TypeDescription schema = TypeDescription.fromString("struct<x:int,y:decimal(10,2),z:string>");
+ OrcProto.Stream.Kind[] bloomFilterKinds = new OrcProto.Stream.Kind[4];
+
+ // normal read
+ DiskRangeList ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ false, new boolean[]{true, true, false, true},
+ new boolean[]{false, true, false, true},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[3]);
+ assertEquals("range start: 0 end: 2000", ranges.toString());
+ assertEquals("range start: 5000 end: 6000", ranges.next.toString());
+ assertEquals("range start: 7000 end: 8000", ranges.next.next.toString());
+ assertEquals(null, ranges.next.next.next);
+
+ //
+ Arrays.fill(bloomFilterKinds, null);
+ ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ true, null,
+ new boolean[]{false, true, true, false},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[2]);
+ assertEquals("range start: 0 end: 3000", ranges.toString());
+ assertEquals("range start: 4000 end: 6000", ranges.next.toString());
+ assertEquals(null, ranges.next.next);
+ }
+
+ @Test
+ public void TestNewBloomFilters() throws Exception {
+ OrcProto.StripeFooter footer =
+ OrcProto.StripeFooter.newBuilder()
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(1).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(1).setKind(OrcProto.Stream.Kind.BLOOM_FILTER).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(2).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.ROW_INDEX).setLength(1000).build())
+ .addStreams(OrcProto.Stream.newBuilder()
+ .setColumn(3).setKind(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8).setLength(1000).build())
+ .build();
+ TypeDescription schema = TypeDescription.fromString("struct<x:int,y:decimal(10,2),z:string>");
+ OrcProto.Stream.Kind[] bloomFilterKinds = new OrcProto.Stream.Kind[4];
+
+ // normal read
+ DiskRangeList ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ false, new boolean[]{true, true, false, true},
+ new boolean[]{false, true, false, true},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[3]);
+ assertEquals("range start: 0 end: 2000", ranges.toString());
+ assertEquals("range start: 4000 end: 6000", ranges.next.toString());
+ assertEquals(null, ranges.next.next);
+
+ //
+ Arrays.fill(bloomFilterKinds, null);
+ ranges = RecordReaderUtils.planIndexReading(schema, footer,
+ true, null,
+ new boolean[]{false, true, true, false},
+ OrcFile.WriterVersion.HIVE_4243,
+ bloomFilterKinds);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER, bloomFilterKinds[1]);
+ assertEquals(OrcProto.Stream.Kind.BLOOM_FILTER_UTF8, bloomFilterKinds[2]);
+ assertEquals("range start: 0 end: 5000", ranges.toString());
+ assertEquals(null, ranges.next);
+ }
}
http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/core/src/test/org/apache/orc/util/TestMurmur3.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/util/TestMurmur3.java b/java/core/src/test/org/apache/orc/util/TestMurmur3.java
new file mode 100644
index 0000000..575e250
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/util/TestMurmur3.java
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+import org.apache.orc.util.Murmur3;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Tests for Murmur3 variants.
+ */
+public class TestMurmur3 {
+
+ @Test
+ public void testHashCodesM3_32_string() {
+ String key = "test";
+ int seed = 123;
+ HashFunction hf = Hashing.murmur3_32(seed);
+ int hc1 = hf.hashBytes(key.getBytes()).asInt();
+ int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+ assertEquals(hc1, hc2);
+
+ key = "testkey";
+ hc1 = hf.hashBytes(key.getBytes()).asInt();
+ hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+ assertEquals(hc1, hc2);
+ }
+
+ @Test
+ public void testHashCodesM3_32_ints() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ int val = rand.nextInt();
+ byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_32_longs() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ long val = rand.nextLong();
+ byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_32_double() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_32(seed);
+ for (int i = 0; i < 1000; i++) {
+ double val = rand.nextDouble();
+ byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+ int hc1 = hf.hashBytes(data).asInt();
+ int hc2 = Murmur3.hash32(data, data.length, seed);
+ assertEquals(hc1, hc2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_string() {
+ String key = "test";
+ int seed = 123;
+ HashFunction hf = Hashing.murmur3_128(seed);
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(key.getBytes()).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+
+ key = "testkey128_testkey128";
+ buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(key.getBytes()).asBytes());
+ buf.flip();
+ gl1 = buf.getLong();
+ gl2 = buf.getLong(8);
+ byte[] keyBytes = key.getBytes();
+ hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed);
+ m1 = hc[0];
+ m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+
+ byte[] offsetKeyBytes = new byte[keyBytes.length + 35];
+ Arrays.fill(offsetKeyBytes, (byte) -1);
+ System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length);
+ hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed);
+ assertEquals(gl1, hc[0]);
+ assertEquals(gl2, hc[1]);
+ }
+
+ @Test
+ public void testHashCodeM3_64() {
+ byte[] origin = ("It was the best of times, it was the worst of times," +
+ " it was the age of wisdom, it was the age of foolishness," +
+ " it was the epoch of belief, it was the epoch of incredulity," +
+ " it was the season of Light, it was the season of Darkness," +
+ " it was the spring of hope, it was the winter of despair," +
+ " we had everything before us, we had nothing before us," +
+ " we were all going direct to Heaven," +
+ " we were all going direct the other way.").getBytes();
+ long hash = Murmur3.hash64(origin, 0, origin.length);
+ assertEquals(305830725663368540L, hash);
+
+ byte[] originOffset = new byte[origin.length + 150];
+ Arrays.fill(originOffset, (byte) 123);
+ System.arraycopy(origin, 0, originOffset, 150, origin.length);
+ hash = Murmur3.hash64(originOffset, 150, origin.length);
+ assertEquals(305830725663368540L, hash);
+ }
+
+ @Test
+ public void testHashCodesM3_128_ints() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ int val = rand.nextInt();
+ byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+
+ byte[] offsetData = new byte[data.length + 50];
+ System.arraycopy(data, 0, offsetData, 50, data.length);
+ hc = Murmur3.hash128(offsetData, 50, data.length, seed);
+ assertEquals(gl1, hc[0]);
+ assertEquals(gl2, hc[1]);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_longs() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ long val = rand.nextLong();
+ byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+
+ @Test
+ public void testHashCodesM3_128_double() {
+ int seed = 123;
+ Random rand = new Random(seed);
+ HashFunction hf = Hashing.murmur3_128(seed);
+ for (int i = 0; i < 1000; i++) {
+ double val = rand.nextDouble();
+ byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+ // guava stores the hashcodes in little endian order
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+ buf.put(hf.hashBytes(data).asBytes());
+ buf.flip();
+ long gl1 = buf.getLong();
+ long gl2 = buf.getLong(8);
+ long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+ long m1 = hc[0];
+ long m2 = hc[1];
+ assertEquals(gl1, m1);
+ assertEquals(gl2, m2);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/core/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/java/core/src/test/resources/log4j.properties b/java/core/src/test/resources/log4j.properties
index d2c063d..fae44b6 100644
--- a/java/core/src/test/resources/log4j.properties
+++ b/java/core/src/test/resources/log4j.properties
@@ -15,3 +15,6 @@ log4j.rootLogger=WARN,stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n
+
+# Suppress the warnings about native io not being available
+log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/mapreduce/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/java/mapreduce/src/test/resources/log4j.properties b/java/mapreduce/src/test/resources/log4j.properties
index d2c063d..fae44b6 100644
--- a/java/mapreduce/src/test/resources/log4j.properties
+++ b/java/mapreduce/src/test/resources/log4j.properties
@@ -15,3 +15,6 @@ log4j.rootLogger=WARN,stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n
+
+# Suppress the warnings about native io not being available
+log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/orc/blob/9d39cb80/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java b/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
deleted file mode 100644
index e60690d..0000000
--- a/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.common.util;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * BloomFilter is a probabilistic data structure for set membership check. BloomFilters are
- * highly space efficient when compared to using a HashSet. Because of the probabilistic nature of
- * bloom filter false positive (element not present in bloom filter but test() says true) are
- * possible but false negatives are not possible (if element is present then test() will never
- * say false). The false positive probability is configurable (default: 5%) depending on which
- * storage requirement may increase or decrease. Lower the false positive probability greater
- * is the space requirement.
- * Bloom filters are sensitive to number of elements that will be inserted in the bloom filter.
- * During the creation of bloom filter expected number of entries must be specified. If the number
- * of insertions exceed the specified initial number of entries then false positive probability will
- * increase accordingly.
- *
- * Internally, this implementation of bloom filter uses Murmur3 fast non-cryptographic hash
- * algorithm. Although Murmur2 is slightly faster than Murmur3 in Java, it suffers from hash
- * collisions for specific sequence of repeating bytes. Check the following link for more info
- * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw
- */
-public class BloomFilter {
- public static final double DEFAULT_FPP = 0.05;
- protected BitSet bitSet;
- protected int numBits;
- protected int numHashFunctions;
-
- public BloomFilter() {
- }
-
- public BloomFilter(long expectedEntries) {
- this(expectedEntries, DEFAULT_FPP);
- }
-
- static void checkArgument(boolean expression, String message) {
- if (!expression) {
- throw new IllegalArgumentException(message);
- }
- }
-
- public BloomFilter(long expectedEntries, double fpp) {
- checkArgument(expectedEntries > 0, "expectedEntries should be > 0");
- checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0");
- int nb = optimalNumOfBits(expectedEntries, fpp);
- // make 'm' multiple of 64
- this.numBits = nb + (Long.SIZE - (nb % Long.SIZE));
- this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, numBits);
- this.bitSet = new BitSet(numBits);
- }
-
- /**
- * A constructor to support rebuilding the BloomFilter from a serialized representation.
- * @param bits
- * @param numBits
- * @param numFuncs
- */
- public BloomFilter(List<Long> bits, int numBits, int numFuncs) {
- super();
- long[] copied = new long[bits.size()];
- for (int i = 0; i < bits.size(); i++) copied[i] = bits.get(i);
- bitSet = new BitSet(copied);
- this.numBits = numBits;
- numHashFunctions = numFuncs;
- }
-
- static int optimalNumOfHashFunctions(long n, long m) {
- return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
- }
-
- static int optimalNumOfBits(long n, double p) {
- return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
- }
-
- public void add(byte[] val) {
- if (val == null) {
- addBytes(val, -1, -1);
- } else {
- addBytes(val, 0, val.length);
- }
- }
-
- public void addBytes(byte[] val, int offset, int length) {
- // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter"
- // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively
- // implement a Bloom filter without any loss in the asymptotic false positive probability'
-
- // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned
- // in the above paper
- long hash64 = val == null ? Murmur3.NULL_HASHCODE :
- Murmur3.hash64(val, offset, length);
- addHash(hash64);
- }
-
- private void addHash(long hash64) {
- int hash1 = (int) hash64;
- int hash2 = (int) (hash64 >>> 32);
-
- for (int i = 1; i <= numHashFunctions; i++) {
- int combinedHash = hash1 + (i * hash2);
- // hashcode should be positive, flip all the bits if it's negative
- if (combinedHash < 0) {
- combinedHash = ~combinedHash;
- }
- int pos = combinedHash % numBits;
- bitSet.set(pos);
- }
- }
-
- public void addString(String val) {
- if (val == null) {
- add(null);
- } else {
- add(val.getBytes());
- }
- }
-
- public void addLong(long val) {
- addHash(getLongHash(val));
- }
-
- public void addDouble(double val) {
- addLong(Double.doubleToLongBits(val));
- }
-
- public boolean test(byte[] val) {
- if (val == null) {
- return testBytes(val, -1, -1);
- }
- return testBytes(val, 0, val.length);
- }
-
- public boolean testBytes(byte[] val, int offset, int length) {
- long hash64 = val == null ? Murmur3.NULL_HASHCODE :
- Murmur3.hash64(val, offset, length);
- return testHash(hash64);
- }
-
- private boolean testHash(long hash64) {
- int hash1 = (int) hash64;
- int hash2 = (int) (hash64 >>> 32);
-
- for (int i = 1; i <= numHashFunctions; i++) {
- int combinedHash = hash1 + (i * hash2);
- // hashcode should be positive, flip all the bits if it's negative
- if (combinedHash < 0) {
- combinedHash = ~combinedHash;
- }
- int pos = combinedHash % numBits;
- if (!bitSet.get(pos)) {
- return false;
- }
- }
- return true;
- }
-
- public boolean testString(String val) {
- if (val == null) {
- return test(null);
- } else {
- return test(val.getBytes());
- }
- }
-
- public boolean testLong(long val) {
- return testHash(getLongHash(val));
- }
-
- // Thomas Wang's integer hash function
- // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
- private long getLongHash(long key) {
- key = (~key) + (key << 21); // key = (key << 21) - key - 1;
- key = key ^ (key >> 24);
- key = (key + (key << 3)) + (key << 8); // key * 265
- key = key ^ (key >> 14);
- key = (key + (key << 2)) + (key << 4); // key * 21
- key = key ^ (key >> 28);
- key = key + (key << 31);
- return key;
- }
-
- public boolean testDouble(double val) {
- return testLong(Double.doubleToLongBits(val));
- }
-
- public long sizeInBytes() {
- return getBitSize() / 8;
- }
-
- public int getBitSize() {
- return bitSet.getData().length * Long.SIZE;
- }
-
- public int getNumHashFunctions() {
- return numHashFunctions;
- }
-
- public long[] getBitSet() {
- return bitSet.getData();
- }
-
- @Override
- public String toString() {
- return "m: " + numBits + " k: " + numHashFunctions;
- }
-
- /**
- * Merge the specified bloom filter with current bloom filter.
- *
- * @param that - bloom filter to merge
- */
- public void merge(BloomFilter that) {
- if (this != that && this.numBits == that.numBits && this.numHashFunctions == that.numHashFunctions) {
- this.bitSet.putAll(that.bitSet);
- } else {
- throw new IllegalArgumentException("BloomFilters are not compatible for merging." +
- " this - " + this.toString() + " that - " + that.toString());
- }
- }
-
- public void reset() {
- this.bitSet.clear();
- }
-
- /**
- * Bare metal bit set implementation. For performance reasons, this implementation does not check
- * for index bounds nor expand the bit set size if the specified index is greater than the size.
- */
- public class BitSet {
- private final long[] data;
-
- public BitSet(long bits) {
- this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]);
- }
-
- /**
- * Deserialize long array as bit set.
- *
- * @param data - bit array
- */
- public BitSet(long[] data) {
- assert data.length > 0 : "data length is zero!";
- this.data = data;
- }
-
- /**
- * Sets the bit at specified index.
- *
- * @param index - position
- */
- public void set(int index) {
- data[index >>> 6] |= (1L << index);
- }
-
- /**
- * Returns true if the bit is set in the specified index.
- *
- * @param index - position
- * @return - value at the bit position
- */
- public boolean get(int index) {
- return (data[index >>> 6] & (1L << index)) != 0;
- }
-
- /**
- * Number of bits
- */
- public long bitSize() {
- return (long) data.length * Long.SIZE;
- }
-
- public long[] getData() {
- return data;
- }
-
- /**
- * Combines the two BitArrays using bitwise OR.
- */
- public void putAll(BitSet array) {
- assert data.length == array.data.length :
- "BitArrays must be of equal length (" + data.length + "!= " + array.data.length + ")";
- for (int i = 0; i < data.length; i++) {
- data[i] |= array.data[i];
- }
- }
-
- /**
- * Clear the bit set.
- */
- public void clear() {
- Arrays.fill(data, 0);
- }
- }
-}