You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/01/09 02:58:30 UTC
[orc] branch branch-1.8 updated: ORC-1338: Set bloom filter fpp to 1%
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.8 by this push:
new 966c0c1e9 ORC-1338: Set bloom filter fpp to 1%
966c0c1e9 is described below
commit 966c0c1e9de361babdfe1b7ac231c5afc1e669c7
Author: William Hyun <wi...@apache.org>
AuthorDate: Fri Dec 30 00:07:53 2022 -0800
ORC-1338: Set bloom filter fpp to 1%
### What changes were proposed in this pull request?
This PR aims to set the bloom filter fpp to 1%.
### Why are the changes needed?
Parquet uses 1% fpp
- https://github.com/apache/parquet-mr/blob/433de8df33fcf31927f7b51456be9f53e64d48b9/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java#L61
### How was this patch tested?
Pass the CIs with updated test cases.
Closes #1349 from williamhyun/bloomfilter.
Authored-by: William Hyun <wi...@apache.org>
Signed-off-by: William Hyun <wi...@apache.org>
(cherry picked from commit f9e096eb8091cbfa7e363ce0a4c9f809d3048df1)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
java/core/src/java/org/apache/orc/OrcConf.java | 2 +-
.../test/resources/orc-file-dump-bloomfilter.out | 158 ++++----
java/tools/src/test/resources/orc-file-dump.json | 396 ++++++++++-----------
3 files changed, 278 insertions(+), 278 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index d2d22fc8f..04a570eab 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -83,7 +83,7 @@ public enum OrcConf {
"3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
"space. This will make sure that no stripe written will block\n" +
" boundaries and cause remote reads within a node local task."),
- BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
+ BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.01,
"Define the default false positive probability for bloom filters."),
USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
"Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index 87b665c2d..a2f3fb05c 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_14
+File Version: 0.12 with ORC_14 by ORC Java 1.9.0-SNAPSHOT
Rows: 21000
Compression: ZLIB
Compression size: 4096
@@ -45,17 +45,17 @@ File Statistics:
Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: worst sum: 81761
Stripes:
- Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 749
+ Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 1330
Stream: column 0 section ROW_INDEX start: 3 length 17
Stream: column 1 section ROW_INDEX start: 20 length 166
Stream: column 2 section ROW_INDEX start: 186 length 169
Stream: column 3 section ROW_INDEX start: 355 length 87
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 442 length 310
- Stream: column 1 section DATA start: 752 length 20035
- Stream: column 2 section DATA start: 20787 length 40050
- Stream: column 3 section DATA start: 60837 length 3543
- Stream: column 3 section LENGTH start: 64380 length 25
- Stream: column 3 section DICTIONARY_DATA start: 64405 length 133
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 442 length 891
+ Stream: column 1 section DATA start: 1333 length 20035
+ Stream: column 2 section DATA start: 21368 length 40050
+ Stream: column 3 section DATA start: 61418 length 3543
+ Stream: column 3 section LENGTH start: 64961 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 64986 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -67,23 +67,23 @@ Stripes:
Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32
Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45
Bloom filters for column 3:
- Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 64625 data: 63775 rows: 5000 tail: 87 index: 742
- Stream: column 0 section ROW_INDEX start: 64625 length 17
- Stream: column 1 section ROW_INDEX start: 64642 length 164
- Stream: column 2 section ROW_INDEX start: 64806 length 168
- Stream: column 3 section ROW_INDEX start: 64974 length 83
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 65057 length 310
- Stream: column 1 section DATA start: 65367 length 20035
- Stream: column 2 section DATA start: 85402 length 40050
- Stream: column 3 section DATA start: 125452 length 3532
- Stream: column 3 section LENGTH start: 128984 length 25
- Stream: column 3 section DICTIONARY_DATA start: 129009 length 133
+ Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe: offset: 65206 data: 63775 rows: 5000 tail: 87 index: 1323
+ Stream: column 0 section ROW_INDEX start: 65206 length 17
+ Stream: column 1 section ROW_INDEX start: 65223 length 164
+ Stream: column 2 section ROW_INDEX start: 65387 length 168
+ Stream: column 3 section ROW_INDEX start: 65555 length 83
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 65638 length 891
+ Stream: column 1 section DATA start: 66529 length 20035
+ Stream: column 2 section DATA start: 86564 length 40050
+ Stream: column 3 section DATA start: 126614 length 3532
+ Stream: column 3 section LENGTH start: 130146 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 130171 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -95,23 +95,23 @@ Stripes:
Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43
Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88
Bloom filters for column 3:
- Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 129229 data: 63787 rows: 5000 tail: 87 index: 748
- Stream: column 0 section ROW_INDEX start: 129229 length 17
- Stream: column 1 section ROW_INDEX start: 129246 length 163
- Stream: column 2 section ROW_INDEX start: 129409 length 168
- Stream: column 3 section ROW_INDEX start: 129577 length 90
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 129667 length 310
- Stream: column 1 section DATA start: 129977 length 20035
- Stream: column 2 section DATA start: 150012 length 40050
- Stream: column 3 section DATA start: 190062 length 3544
- Stream: column 3 section LENGTH start: 193606 length 25
- Stream: column 3 section DICTIONARY_DATA start: 193631 length 133
+ Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe: offset: 130391 data: 63787 rows: 5000 tail: 87 index: 1329
+ Stream: column 0 section ROW_INDEX start: 130391 length 17
+ Stream: column 1 section ROW_INDEX start: 130408 length 163
+ Stream: column 2 section ROW_INDEX start: 130571 length 168
+ Stream: column 3 section ROW_INDEX start: 130739 length 90
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 130829 length 891
+ Stream: column 1 section DATA start: 131720 length 20035
+ Stream: column 2 section DATA start: 151755 length 40050
+ Stream: column 3 section DATA start: 191805 length 3544
+ Stream: column 3 section LENGTH start: 195349 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 195374 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -123,23 +123,23 @@ Stripes:
Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194
Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43
Bloom filters for column 3:
- Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 193851 data: 63817 rows: 5000 tail: 86 index: 750
- Stream: column 0 section ROW_INDEX start: 193851 length 17
- Stream: column 1 section ROW_INDEX start: 193868 length 165
- Stream: column 2 section ROW_INDEX start: 194033 length 167
- Stream: column 3 section ROW_INDEX start: 194200 length 91
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 194291 length 310
- Stream: column 1 section DATA start: 194601 length 20035
- Stream: column 2 section DATA start: 214636 length 40050
- Stream: column 3 section DATA start: 254686 length 3574
- Stream: column 3 section LENGTH start: 258260 length 25
- Stream: column 3 section DICTIONARY_DATA start: 258285 length 133
+ Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe: offset: 195594 data: 63817 rows: 5000 tail: 86 index: 1331
+ Stream: column 0 section ROW_INDEX start: 195594 length 17
+ Stream: column 1 section ROW_INDEX start: 195611 length 165
+ Stream: column 2 section ROW_INDEX start: 195776 length 167
+ Stream: column 3 section ROW_INDEX start: 195943 length 91
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 196034 length 891
+ Stream: column 1 section DATA start: 196925 length 20035
+ Stream: column 2 section DATA start: 216960 length 40050
+ Stream: column 3 section DATA start: 257010 length 3574
+ Stream: column 3 section LENGTH start: 260584 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 260609 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -151,23 +151,23 @@ Stripes:
Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104
Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131
Bloom filters for column 3:
- Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe: offset: 258504 data: 12943 rows: 1000 tail: 80 index: 375
- Stream: column 0 section ROW_INDEX start: 258504 length 12
- Stream: column 1 section ROW_INDEX start: 258516 length 38
- Stream: column 2 section ROW_INDEX start: 258554 length 41
- Stream: column 3 section ROW_INDEX start: 258595 length 40
- Stream: column 3 section BLOOM_FILTER_UTF8 start: 258635 length 244
- Stream: column 1 section DATA start: 258879 length 4007
- Stream: column 2 section DATA start: 262886 length 8010
- Stream: column 3 section DATA start: 270896 length 768
- Stream: column 3 section LENGTH start: 271664 length 25
- Stream: column 3 section DICTIONARY_DATA start: 271689 length 133
+ Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe: offset: 260828 data: 12943 rows: 1000 tail: 80 index: 526
+ Stream: column 0 section ROW_INDEX start: 260828 length 12
+ Stream: column 1 section ROW_INDEX start: 260840 length 38
+ Stream: column 2 section ROW_INDEX start: 260878 length 41
+ Stream: column 3 section ROW_INDEX start: 260919 length 40
+ Stream: column 3 section BLOOM_FILTER_UTF8 start: 260959 length 395
+ Stream: column 1 section DATA start: 261354 length 4007
+ Stream: column 2 section DATA start: 265361 length 8010
+ Stream: column 3 section DATA start: 273371 length 768
+ Stream: column 3 section LENGTH start: 274139 length 25
+ Stream: column 3 section DICTIONARY_DATA start: 274164 length 133
Encoding column 0: DIRECT
Encoding column 1: DIRECT_V2
Encoding column 2: DIRECT_V2
@@ -175,10 +175,10 @@ Stripes:
Row group indices for column 3:
Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0
Bloom filters for column 3:
- Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
- Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
+ Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+ Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
-File length: 272535 bytes
+File length: 275025 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 1540f7c67..15a9c2495 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -2,7 +2,7 @@
"fileName": "TestFileDump.testDump.orc",
"fileVersion": "0.12",
"writerVersion": "ORC_14",
- "softwareVersion": "ORC Java 1.8.0-SNAPSHOT",
+ "softwareVersion": "ORC Java 1.9.0-SNAPSHOT",
"numberOfRows": 21000,
"compression": "ZLIB",
"compressionBufferSize": 4096,
@@ -268,7 +268,7 @@
"stripeNumber": 1,
"stripeInformation": {
"offset": 3,
- "indexLength": 768,
+ "indexLength": 1349,
"dataLength": 63770,
"footerLength": 89,
"rowCount": 5000
@@ -302,42 +302,42 @@
"columnId": 3,
"section": "BLOOM_FILTER_UTF8",
"startOffset": 461,
- "length": 310
+ "length": 891
},
{
"columnId": 1,
"section": "DATA",
- "startOffset": 771,
+ "startOffset": 1352,
"length": 20035
},
{
"columnId": 2,
"section": "DATA",
- "startOffset": 20806,
+ "startOffset": 21387,
"length": 40050
},
{
"columnId": 3,
"section": "PRESENT",
- "startOffset": 60856,
+ "startOffset": 61437,
"length": 17
},
{
"columnId": 3,
"section": "DATA",
- "startOffset": 60873,
+ "startOffset": 61454,
"length": 3510
},
{
"columnId": 3,
"section": "LENGTH",
- "startOffset": 64383,
+ "startOffset": 64964,
"length": 25
},
{
"columnId": 3,
"section": "DICTIONARY_DATA",
- "startOffset": 64408,
+ "startOffset": 64989,
"length": 133
}
],
@@ -458,51 +458,51 @@
"bloomFilterIndexes": [
{
"entryId": 0,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 1,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 2,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 3,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 4,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
],
"stripeLevelBloomFilter": {
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
}
]
@@ -510,8 +510,8 @@
{
"stripeNumber": 2,
"stripeInformation": {
- "offset": 64630,
- "indexLength": 759,
+ "offset": 65211,
+ "indexLength": 1340,
"dataLength": 63763,
"footerLength": 88,
"rowCount": 5000
@@ -520,67 +520,67 @@
{
"columnId": 0,
"section": "ROW_INDEX",
- "startOffset": 64630,
+ "startOffset": 65211,
"length": 17
},
{
"columnId": 1,
"section": "ROW_INDEX",
- "startOffset": 64647,
+ "startOffset": 65228,
"length": 166
},
{
"columnId": 2,
"section": "ROW_INDEX",
- "startOffset": 64813,
+ "startOffset": 65394,
"length": 166
},
{
"columnId": 3,
"section": "ROW_INDEX",
- "startOffset": 64979,
+ "startOffset": 65560,
"length": 100
},
{
"columnId": 3,
"section": "BLOOM_FILTER_UTF8",
- "startOffset": 65079,
- "length": 310
+ "startOffset": 65660,
+ "length": 891
},
{
"columnId": 1,
"section": "DATA",
- "startOffset": 65389,
+ "startOffset": 66551,
"length": 20035
},
{
"columnId": 2,
"section": "DATA",
- "startOffset": 85424,
+ "startOffset": 86586,
"length": 40050
},
{
"columnId": 3,
"section": "PRESENT",
- "startOffset": 125474,
+ "startOffset": 126636,
"length": 17
},
{
"columnId": 3,
"section": "DATA",
- "startOffset": 125491,
+ "startOffset": 126653,
"length": 3503
},
{
"columnId": 3,
"section": "LENGTH",
- "startOffset": 128994,
+ "startOffset": 130156,
"length": 25
},
{
"columnId": 3,
"section": "DICTIONARY_DATA",
- "startOffset": 129019,
+ "startOffset": 130181,
"length": 133
}
],
@@ -701,51 +701,51 @@
"bloomFilterIndexes": [
{
"entryId": 0,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 1,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 2,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 3,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 4,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
],
"stripeLevelBloomFilter": {
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
}
]
@@ -753,77 +753,77 @@
{
"stripeNumber": 3,
"stripeInformation": {
- "offset": 129240,
- "indexLength": 760,
+ "offset": 130402,
+ "indexLength": 1341,
"dataLength": 63770,
- "footerLength": 89,
+ "footerLength": 90,
"rowCount": 5000
},
"streams": [
{
"columnId": 0,
"section": "ROW_INDEX",
- "startOffset": 129240,
+ "startOffset": 130402,
"length": 17
},
{
"columnId": 1,
"section": "ROW_INDEX",
- "startOffset": 129257,
+ "startOffset": 130419,
"length": 164
},
{
"columnId": 2,
"section": "ROW_INDEX",
- "startOffset": 129421,
+ "startOffset": 130583,
"length": 167
},
{
"columnId": 3,
"section": "ROW_INDEX",
- "startOffset": 129588,
+ "startOffset": 130750,
"length": 102
},
{
"columnId": 3,
"section": "BLOOM_FILTER_UTF8",
- "startOffset": 129690,
- "length": 310
+ "startOffset": 130852,
+ "length": 891
},
{
"columnId": 1,
"section": "DATA",
- "startOffset": 130000,
+ "startOffset": 131743,
"length": 20035
},
{
"columnId": 2,
"section": "DATA",
- "startOffset": 150035,
+ "startOffset": 151778,
"length": 40050
},
{
"columnId": 3,
"section": "PRESENT",
- "startOffset": 190085,
+ "startOffset": 191828,
"length": 17
},
{
"columnId": 3,
"section": "DATA",
- "startOffset": 190102,
+ "startOffset": 191845,
"length": 3510
},
{
"columnId": 3,
"section": "LENGTH",
- "startOffset": 193612,
+ "startOffset": 195355,
"length": 25
},
{
"columnId": 3,
"section": "DICTIONARY_DATA",
- "startOffset": 193637,
+ "startOffset": 195380,
"length": 133
}
],
@@ -944,51 +944,51 @@
"bloomFilterIndexes": [
{
"entryId": 0,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 1,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 2,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 3,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 4,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
],
"stripeLevelBloomFilter": {
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
}
]
@@ -996,8 +996,8 @@
{
"stripeNumber": 4,
"stripeInformation": {
- "offset": 193859,
- "indexLength": 771,
+ "offset": 195603,
+ "indexLength": 1352,
"dataLength": 63756,
"footerLength": 90,
"rowCount": 5000
@@ -1006,67 +1006,67 @@
{
"columnId": 0,
"section": "ROW_INDEX",
- "startOffset": 193859,
+ "startOffset": 195603,
"length": 17
},
{
"columnId": 1,
"section": "ROW_INDEX",
- "startOffset": 193876,
+ "startOffset": 195620,
"length": 166
},
{
"columnId": 2,
"section": "ROW_INDEX",
- "startOffset": 194042,
+ "startOffset": 195786,
"length": 171
},
{
"columnId": 3,
"section": "ROW_INDEX",
- "startOffset": 194213,
+ "startOffset": 195957,
"length": 107
},
{
"columnId": 3,
"section": "BLOOM_FILTER_UTF8",
- "startOffset": 194320,
- "length": 310
+ "startOffset": 196064,
+ "length": 891
},
{
"columnId": 1,
"section": "DATA",
- "startOffset": 194630,
+ "startOffset": 196955,
"length": 20035
},
{
"columnId": 2,
"section": "DATA",
- "startOffset": 214665,
+ "startOffset": 216990,
"length": 40050
},
{
"columnId": 3,
"section": "PRESENT",
- "startOffset": 254715,
+ "startOffset": 257040,
"length": 17
},
{
"columnId": 3,
"section": "DATA",
- "startOffset": 254732,
+ "startOffset": 257057,
"length": 3496
},
{
"columnId": 3,
"section": "LENGTH",
- "startOffset": 258228,
+ "startOffset": 260553,
"length": 25
},
{
"columnId": 3,
"section": "DICTIONARY_DATA",
- "startOffset": 258253,
+ "startOffset": 260578,
"length": 133
}
],
@@ -1187,51 +1187,51 @@
"bloomFilterIndexes": [
{
"entryId": 0,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 1,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 2,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 3,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
},
{
"entryId": 4,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
],
"stripeLevelBloomFilter": {
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
}
]
@@ -1239,77 +1239,77 @@
{
"stripeNumber": 5,
"stripeInformation": {
- "offset": 258476,
- "indexLength": 376,
+ "offset": 260801,
+ "indexLength": 527,
"dataLength": 12943,
- "footerLength": 85,
+ "footerLength": 84,
"rowCount": 1000
},
"streams": [
{
"columnId": 0,
"section": "ROW_INDEX",
- "startOffset": 258476,
+ "startOffset": 260801,
"length": 12
},
{
"columnId": 1,
"section": "ROW_INDEX",
- "startOffset": 258488,
+ "startOffset": 260813,
"length": 38
},
{
"columnId": 2,
"section": "ROW_INDEX",
- "startOffset": 258526,
+ "startOffset": 260851,
"length": 41
},
{
"columnId": 3,
"section": "ROW_INDEX",
- "startOffset": 258567,
+ "startOffset": 260892,
"length": 41
},
{
"columnId": 3,
"section": "BLOOM_FILTER_UTF8",
- "startOffset": 258608,
- "length": 244
+ "startOffset": 260933,
+ "length": 395
},
{
"columnId": 1,
"section": "DATA",
- "startOffset": 258852,
+ "startOffset": 261328,
"length": 4007
},
{
"columnId": 2,
"section": "DATA",
- "startOffset": 262859,
+ "startOffset": 265335,
"length": 8010
},
{
"columnId": 3,
"section": "PRESENT",
- "startOffset": 270869,
+ "startOffset": 273345,
"length": 16
},
{
"columnId": 3,
"section": "DATA",
- "startOffset": 270885,
+ "startOffset": 273361,
"length": 752
},
{
"columnId": 3,
"section": "LENGTH",
- "startOffset": 271637,
+ "startOffset": 274113,
"length": 25
},
{
"columnId": 3,
"section": "DICTIONARY_DATA",
- "startOffset": 271662,
+ "startOffset": 274138,
"length": 133
}
],
@@ -1358,25 +1358,25 @@
"bloomFilterIndexes": [
{
"entryId": 0,
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
],
"stripeLevelBloomFilter": {
- "numHashFunctions": 4,
- "bitCount": 6272,
- "popCount": 138,
- "loadFactor": 0.022002551704645157,
- "expectedFpp": 2.3436470542037569E-7
+ "numHashFunctions": 7,
+ "bitCount": 9600,
+ "popCount": 238,
+ "loadFactor": 0.024791667237877846,
+ "expectedFpp": 5.756256582500896E-12
}
}
]
}
],
- "fileLength": 272529,
+ "fileLength": 275003,
"paddingLength": 0,
"paddingRatio": 0.0,
"status": "OK"