You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/01/09 02:58:30 UTC

[orc] branch branch-1.8 updated: ORC-1338: Set bloom filter fpp to 1%

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.8 by this push:
     new 966c0c1e9 ORC-1338: Set bloom filter fpp to 1%
966c0c1e9 is described below

commit 966c0c1e9de361babdfe1b7ac231c5afc1e669c7
Author: William Hyun <wi...@apache.org>
AuthorDate: Fri Dec 30 00:07:53 2022 -0800

    ORC-1338: Set bloom filter fpp to 1%
    
    ### What changes were proposed in this pull request?
    This PR aims to set the bloom filter fpp to 1%.
    
    ### Why are the changes needed?
    Parquet uses 1% fpp
    - https://github.com/apache/parquet-mr/blob/433de8df33fcf31927f7b51456be9f53e64d48b9/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java#L61
    
    ### How was this patch tested?
    Pass the CIs with updated test cases.
    
    Closes #1349 from williamhyun/bloomfilter.
    
    Authored-by: William Hyun <wi...@apache.org>
    Signed-off-by: William Hyun <wi...@apache.org>
    (cherry picked from commit f9e096eb8091cbfa7e363ce0a4c9f809d3048df1)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java     |   2 +-
 .../test/resources/orc-file-dump-bloomfilter.out   | 158 ++++----
 java/tools/src/test/resources/orc-file-dump.json   | 396 ++++++++++-----------
 3 files changed, 278 insertions(+), 278 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index d2d22fc8f..04a570eab 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -83,7 +83,7 @@ public enum OrcConf {
           "3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
           "space. This will make sure that no stripe written will block\n" +
           " boundaries and cause remote reads within a node local task."),
-  BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
+  BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.01,
       "Define the default false positive probability for bloom filters."),
   USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
       "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index 87b665c2d..a2f3fb05c 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_14
+File Version: 0.12 with ORC_14 by ORC Java 1.9.0-SNAPSHOT
 Rows: 21000
 Compression: ZLIB
 Compression size: 4096
@@ -45,17 +45,17 @@ File Statistics:
   Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: worst sum: 81761
 
 Stripes:
-  Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 749
+  Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 1330
     Stream: column 0 section ROW_INDEX start: 3 length 17
     Stream: column 1 section ROW_INDEX start: 20 length 166
     Stream: column 2 section ROW_INDEX start: 186 length 169
     Stream: column 3 section ROW_INDEX start: 355 length 87
-    Stream: column 3 section BLOOM_FILTER_UTF8 start: 442 length 310
-    Stream: column 1 section DATA start: 752 length 20035
-    Stream: column 2 section DATA start: 20787 length 40050
-    Stream: column 3 section DATA start: 60837 length 3543
-    Stream: column 3 section LENGTH start: 64380 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 64405 length 133
+    Stream: column 3 section BLOOM_FILTER_UTF8 start: 442 length 891
+    Stream: column 1 section DATA start: 1333 length 20035
+    Stream: column 2 section DATA start: 21368 length 40050
+    Stream: column 3 section DATA start: 61418 length 3543
+    Stream: column 3 section LENGTH start: 64961 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 64986 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -67,23 +67,23 @@ Stripes:
       Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32
       Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45
     Bloom filters for column 3:
-      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 64625 data: 63775 rows: 5000 tail: 87 index: 742
-    Stream: column 0 section ROW_INDEX start: 64625 length 17
-    Stream: column 1 section ROW_INDEX start: 64642 length 164
-    Stream: column 2 section ROW_INDEX start: 64806 length 168
-    Stream: column 3 section ROW_INDEX start: 64974 length 83
-    Stream: column 3 section BLOOM_FILTER_UTF8 start: 65057 length 310
-    Stream: column 1 section DATA start: 65367 length 20035
-    Stream: column 2 section DATA start: 85402 length 40050
-    Stream: column 3 section DATA start: 125452 length 3532
-    Stream: column 3 section LENGTH start: 128984 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 129009 length 133
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+  Stripe: offset: 65206 data: 63775 rows: 5000 tail: 87 index: 1323
+    Stream: column 0 section ROW_INDEX start: 65206 length 17
+    Stream: column 1 section ROW_INDEX start: 65223 length 164
+    Stream: column 2 section ROW_INDEX start: 65387 length 168
+    Stream: column 3 section ROW_INDEX start: 65555 length 83
+    Stream: column 3 section BLOOM_FILTER_UTF8 start: 65638 length 891
+    Stream: column 1 section DATA start: 66529 length 20035
+    Stream: column 2 section DATA start: 86564 length 40050
+    Stream: column 3 section DATA start: 126614 length 3532
+    Stream: column 3 section LENGTH start: 130146 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 130171 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -95,23 +95,23 @@ Stripes:
       Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43
       Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88
     Bloom filters for column 3:
-      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 129229 data: 63787 rows: 5000 tail: 87 index: 748
-    Stream: column 0 section ROW_INDEX start: 129229 length 17
-    Stream: column 1 section ROW_INDEX start: 129246 length 163
-    Stream: column 2 section ROW_INDEX start: 129409 length 168
-    Stream: column 3 section ROW_INDEX start: 129577 length 90
-    Stream: column 3 section BLOOM_FILTER_UTF8 start: 129667 length 310
-    Stream: column 1 section DATA start: 129977 length 20035
-    Stream: column 2 section DATA start: 150012 length 40050
-    Stream: column 3 section DATA start: 190062 length 3544
-    Stream: column 3 section LENGTH start: 193606 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 193631 length 133
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+  Stripe: offset: 130391 data: 63787 rows: 5000 tail: 87 index: 1329
+    Stream: column 0 section ROW_INDEX start: 130391 length 17
+    Stream: column 1 section ROW_INDEX start: 130408 length 163
+    Stream: column 2 section ROW_INDEX start: 130571 length 168
+    Stream: column 3 section ROW_INDEX start: 130739 length 90
+    Stream: column 3 section BLOOM_FILTER_UTF8 start: 130829 length 891
+    Stream: column 1 section DATA start: 131720 length 20035
+    Stream: column 2 section DATA start: 151755 length 40050
+    Stream: column 3 section DATA start: 191805 length 3544
+    Stream: column 3 section LENGTH start: 195349 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 195374 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -123,23 +123,23 @@ Stripes:
       Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194
       Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43
     Bloom filters for column 3:
-      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 193851 data: 63817 rows: 5000 tail: 86 index: 750
-    Stream: column 0 section ROW_INDEX start: 193851 length 17
-    Stream: column 1 section ROW_INDEX start: 193868 length 165
-    Stream: column 2 section ROW_INDEX start: 194033 length 167
-    Stream: column 3 section ROW_INDEX start: 194200 length 91
-    Stream: column 3 section BLOOM_FILTER_UTF8 start: 194291 length 310
-    Stream: column 1 section DATA start: 194601 length 20035
-    Stream: column 2 section DATA start: 214636 length 40050
-    Stream: column 3 section DATA start: 254686 length 3574
-    Stream: column 3 section LENGTH start: 258260 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 258285 length 133
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+  Stripe: offset: 195594 data: 63817 rows: 5000 tail: 86 index: 1331
+    Stream: column 0 section ROW_INDEX start: 195594 length 17
+    Stream: column 1 section ROW_INDEX start: 195611 length 165
+    Stream: column 2 section ROW_INDEX start: 195776 length 167
+    Stream: column 3 section ROW_INDEX start: 195943 length 91
+    Stream: column 3 section BLOOM_FILTER_UTF8 start: 196034 length 891
+    Stream: column 1 section DATA start: 196925 length 20035
+    Stream: column 2 section DATA start: 216960 length 40050
+    Stream: column 3 section DATA start: 257010 length 3574
+    Stream: column 3 section LENGTH start: 260584 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 260609 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -151,23 +151,23 @@ Stripes:
       Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104
       Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131
     Bloom filters for column 3:
-      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 258504 data: 12943 rows: 1000 tail: 80 index: 375
-    Stream: column 0 section ROW_INDEX start: 258504 length 12
-    Stream: column 1 section ROW_INDEX start: 258516 length 38
-    Stream: column 2 section ROW_INDEX start: 258554 length 41
-    Stream: column 3 section ROW_INDEX start: 258595 length 40
-    Stream: column 3 section BLOOM_FILTER_UTF8 start: 258635 length 244
-    Stream: column 1 section DATA start: 258879 length 4007
-    Stream: column 2 section DATA start: 262886 length 8010
-    Stream: column 3 section DATA start: 270896 length 768
-    Stream: column 3 section LENGTH start: 271664 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 271689 length 133
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+  Stripe: offset: 260828 data: 12943 rows: 1000 tail: 80 index: 526
+    Stream: column 0 section ROW_INDEX start: 260828 length 12
+    Stream: column 1 section ROW_INDEX start: 260840 length 38
+    Stream: column 2 section ROW_INDEX start: 260878 length 41
+    Stream: column 3 section ROW_INDEX start: 260919 length 40
+    Stream: column 3 section BLOOM_FILTER_UTF8 start: 260959 length 395
+    Stream: column 1 section DATA start: 261354 length 4007
+    Stream: column 2 section DATA start: 265361 length 8010
+    Stream: column 3 section DATA start: 273371 length 768
+    Stream: column 3 section LENGTH start: 274139 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 274164 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -175,10 +175,10 @@ Stripes:
     Row group indices for column 3:
       Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0
     Bloom filters for column 3:
-      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 238 loadFactor: 0.0248 expectedFpp: 5.7562566E-12
 
-File length: 272535 bytes
+File length: 275025 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 ________________________________________________________________________________________________________________________
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 1540f7c67..15a9c2495 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -2,7 +2,7 @@
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
   "writerVersion": "ORC_14",
-  "softwareVersion": "ORC Java 1.8.0-SNAPSHOT",
+  "softwareVersion": "ORC Java 1.9.0-SNAPSHOT",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 4096,
@@ -268,7 +268,7 @@
       "stripeNumber": 1,
       "stripeInformation": {
         "offset": 3,
-        "indexLength": 768,
+        "indexLength": 1349,
         "dataLength": 63770,
         "footerLength": 89,
         "rowCount": 5000
@@ -302,42 +302,42 @@
           "columnId": 3,
           "section": "BLOOM_FILTER_UTF8",
           "startOffset": 461,
-          "length": 310
+          "length": 891
         },
         {
           "columnId": 1,
           "section": "DATA",
-          "startOffset": 771,
+          "startOffset": 1352,
           "length": 20035
         },
         {
           "columnId": 2,
           "section": "DATA",
-          "startOffset": 20806,
+          "startOffset": 21387,
           "length": 40050
         },
         {
           "columnId": 3,
           "section": "PRESENT",
-          "startOffset": 60856,
+          "startOffset": 61437,
           "length": 17
         },
         {
           "columnId": 3,
           "section": "DATA",
-          "startOffset": 60873,
+          "startOffset": 61454,
           "length": 3510
         },
         {
           "columnId": 3,
           "section": "LENGTH",
-          "startOffset": 64383,
+          "startOffset": 64964,
           "length": 25
         },
         {
           "columnId": 3,
           "section": "DICTIONARY_DATA",
-          "startOffset": 64408,
+          "startOffset": 64989,
           "length": 133
         }
       ],
@@ -458,51 +458,51 @@
           "bloomFilterIndexes": [
             {
               "entryId": 0,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 1,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 2,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 3,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 4,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             }
           ],
           "stripeLevelBloomFilter": {
-            "numHashFunctions": 4,
-            "bitCount": 6272,
-            "popCount": 138,
-            "loadFactor": 0.022002551704645157,
-            "expectedFpp": 2.3436470542037569E-7
+            "numHashFunctions": 7,
+            "bitCount": 9600,
+            "popCount": 238,
+            "loadFactor": 0.024791667237877846,
+            "expectedFpp": 5.756256582500896E-12
           }
         }
       ]
@@ -510,8 +510,8 @@
     {
       "stripeNumber": 2,
       "stripeInformation": {
-        "offset": 64630,
-        "indexLength": 759,
+        "offset": 65211,
+        "indexLength": 1340,
         "dataLength": 63763,
         "footerLength": 88,
         "rowCount": 5000
@@ -520,67 +520,67 @@
         {
           "columnId": 0,
           "section": "ROW_INDEX",
-          "startOffset": 64630,
+          "startOffset": 65211,
           "length": 17
         },
         {
           "columnId": 1,
           "section": "ROW_INDEX",
-          "startOffset": 64647,
+          "startOffset": 65228,
           "length": 166
         },
         {
           "columnId": 2,
           "section": "ROW_INDEX",
-          "startOffset": 64813,
+          "startOffset": 65394,
           "length": 166
         },
         {
           "columnId": 3,
           "section": "ROW_INDEX",
-          "startOffset": 64979,
+          "startOffset": 65560,
           "length": 100
         },
         {
           "columnId": 3,
           "section": "BLOOM_FILTER_UTF8",
-          "startOffset": 65079,
-          "length": 310
+          "startOffset": 65660,
+          "length": 891
         },
         {
           "columnId": 1,
           "section": "DATA",
-          "startOffset": 65389,
+          "startOffset": 66551,
           "length": 20035
         },
         {
           "columnId": 2,
           "section": "DATA",
-          "startOffset": 85424,
+          "startOffset": 86586,
           "length": 40050
         },
         {
           "columnId": 3,
           "section": "PRESENT",
-          "startOffset": 125474,
+          "startOffset": 126636,
           "length": 17
         },
         {
           "columnId": 3,
           "section": "DATA",
-          "startOffset": 125491,
+          "startOffset": 126653,
           "length": 3503
         },
         {
           "columnId": 3,
           "section": "LENGTH",
-          "startOffset": 128994,
+          "startOffset": 130156,
           "length": 25
         },
         {
           "columnId": 3,
           "section": "DICTIONARY_DATA",
-          "startOffset": 129019,
+          "startOffset": 130181,
           "length": 133
         }
       ],
@@ -701,51 +701,51 @@
           "bloomFilterIndexes": [
             {
               "entryId": 0,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 1,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 2,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 3,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 4,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             }
           ],
           "stripeLevelBloomFilter": {
-            "numHashFunctions": 4,
-            "bitCount": 6272,
-            "popCount": 138,
-            "loadFactor": 0.022002551704645157,
-            "expectedFpp": 2.3436470542037569E-7
+            "numHashFunctions": 7,
+            "bitCount": 9600,
+            "popCount": 238,
+            "loadFactor": 0.024791667237877846,
+            "expectedFpp": 5.756256582500896E-12
           }
         }
       ]
@@ -753,77 +753,77 @@
     {
       "stripeNumber": 3,
       "stripeInformation": {
-        "offset": 129240,
-        "indexLength": 760,
+        "offset": 130402,
+        "indexLength": 1341,
         "dataLength": 63770,
-        "footerLength": 89,
+        "footerLength": 90,
         "rowCount": 5000
       },
       "streams": [
         {
           "columnId": 0,
           "section": "ROW_INDEX",
-          "startOffset": 129240,
+          "startOffset": 130402,
           "length": 17
         },
         {
           "columnId": 1,
           "section": "ROW_INDEX",
-          "startOffset": 129257,
+          "startOffset": 130419,
           "length": 164
         },
         {
           "columnId": 2,
           "section": "ROW_INDEX",
-          "startOffset": 129421,
+          "startOffset": 130583,
           "length": 167
         },
         {
           "columnId": 3,
           "section": "ROW_INDEX",
-          "startOffset": 129588,
+          "startOffset": 130750,
           "length": 102
         },
         {
           "columnId": 3,
           "section": "BLOOM_FILTER_UTF8",
-          "startOffset": 129690,
-          "length": 310
+          "startOffset": 130852,
+          "length": 891
         },
         {
           "columnId": 1,
           "section": "DATA",
-          "startOffset": 130000,
+          "startOffset": 131743,
           "length": 20035
         },
         {
           "columnId": 2,
           "section": "DATA",
-          "startOffset": 150035,
+          "startOffset": 151778,
           "length": 40050
         },
         {
           "columnId": 3,
           "section": "PRESENT",
-          "startOffset": 190085,
+          "startOffset": 191828,
           "length": 17
         },
         {
           "columnId": 3,
           "section": "DATA",
-          "startOffset": 190102,
+          "startOffset": 191845,
           "length": 3510
         },
         {
           "columnId": 3,
           "section": "LENGTH",
-          "startOffset": 193612,
+          "startOffset": 195355,
           "length": 25
         },
         {
           "columnId": 3,
           "section": "DICTIONARY_DATA",
-          "startOffset": 193637,
+          "startOffset": 195380,
           "length": 133
         }
       ],
@@ -944,51 +944,51 @@
           "bloomFilterIndexes": [
             {
               "entryId": 0,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 1,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 2,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 3,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 4,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             }
           ],
           "stripeLevelBloomFilter": {
-            "numHashFunctions": 4,
-            "bitCount": 6272,
-            "popCount": 138,
-            "loadFactor": 0.022002551704645157,
-            "expectedFpp": 2.3436470542037569E-7
+            "numHashFunctions": 7,
+            "bitCount": 9600,
+            "popCount": 238,
+            "loadFactor": 0.024791667237877846,
+            "expectedFpp": 5.756256582500896E-12
           }
         }
       ]
@@ -996,8 +996,8 @@
     {
       "stripeNumber": 4,
       "stripeInformation": {
-        "offset": 193859,
-        "indexLength": 771,
+        "offset": 195603,
+        "indexLength": 1352,
         "dataLength": 63756,
         "footerLength": 90,
         "rowCount": 5000
@@ -1006,67 +1006,67 @@
         {
           "columnId": 0,
           "section": "ROW_INDEX",
-          "startOffset": 193859,
+          "startOffset": 195603,
           "length": 17
         },
         {
           "columnId": 1,
           "section": "ROW_INDEX",
-          "startOffset": 193876,
+          "startOffset": 195620,
           "length": 166
         },
         {
           "columnId": 2,
           "section": "ROW_INDEX",
-          "startOffset": 194042,
+          "startOffset": 195786,
           "length": 171
         },
         {
           "columnId": 3,
           "section": "ROW_INDEX",
-          "startOffset": 194213,
+          "startOffset": 195957,
           "length": 107
         },
         {
           "columnId": 3,
           "section": "BLOOM_FILTER_UTF8",
-          "startOffset": 194320,
-          "length": 310
+          "startOffset": 196064,
+          "length": 891
         },
         {
           "columnId": 1,
           "section": "DATA",
-          "startOffset": 194630,
+          "startOffset": 196955,
           "length": 20035
         },
         {
           "columnId": 2,
           "section": "DATA",
-          "startOffset": 214665,
+          "startOffset": 216990,
           "length": 40050
         },
         {
           "columnId": 3,
           "section": "PRESENT",
-          "startOffset": 254715,
+          "startOffset": 257040,
           "length": 17
         },
         {
           "columnId": 3,
           "section": "DATA",
-          "startOffset": 254732,
+          "startOffset": 257057,
           "length": 3496
         },
         {
           "columnId": 3,
           "section": "LENGTH",
-          "startOffset": 258228,
+          "startOffset": 260553,
           "length": 25
         },
         {
           "columnId": 3,
           "section": "DICTIONARY_DATA",
-          "startOffset": 258253,
+          "startOffset": 260578,
           "length": 133
         }
       ],
@@ -1187,51 +1187,51 @@
           "bloomFilterIndexes": [
             {
               "entryId": 0,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 1,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 2,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 3,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             },
             {
               "entryId": 4,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             }
           ],
           "stripeLevelBloomFilter": {
-            "numHashFunctions": 4,
-            "bitCount": 6272,
-            "popCount": 138,
-            "loadFactor": 0.022002551704645157,
-            "expectedFpp": 2.3436470542037569E-7
+            "numHashFunctions": 7,
+            "bitCount": 9600,
+            "popCount": 238,
+            "loadFactor": 0.024791667237877846,
+            "expectedFpp": 5.756256582500896E-12
           }
         }
       ]
@@ -1239,77 +1239,77 @@
     {
       "stripeNumber": 5,
       "stripeInformation": {
-        "offset": 258476,
-        "indexLength": 376,
+        "offset": 260801,
+        "indexLength": 527,
         "dataLength": 12943,
-        "footerLength": 85,
+        "footerLength": 84,
         "rowCount": 1000
       },
       "streams": [
         {
           "columnId": 0,
           "section": "ROW_INDEX",
-          "startOffset": 258476,
+          "startOffset": 260801,
           "length": 12
         },
         {
           "columnId": 1,
           "section": "ROW_INDEX",
-          "startOffset": 258488,
+          "startOffset": 260813,
           "length": 38
         },
         {
           "columnId": 2,
           "section": "ROW_INDEX",
-          "startOffset": 258526,
+          "startOffset": 260851,
           "length": 41
         },
         {
           "columnId": 3,
           "section": "ROW_INDEX",
-          "startOffset": 258567,
+          "startOffset": 260892,
           "length": 41
         },
         {
           "columnId": 3,
           "section": "BLOOM_FILTER_UTF8",
-          "startOffset": 258608,
-          "length": 244
+          "startOffset": 260933,
+          "length": 395
         },
         {
           "columnId": 1,
           "section": "DATA",
-          "startOffset": 258852,
+          "startOffset": 261328,
           "length": 4007
         },
         {
           "columnId": 2,
           "section": "DATA",
-          "startOffset": 262859,
+          "startOffset": 265335,
           "length": 8010
         },
         {
           "columnId": 3,
           "section": "PRESENT",
-          "startOffset": 270869,
+          "startOffset": 273345,
           "length": 16
         },
         {
           "columnId": 3,
           "section": "DATA",
-          "startOffset": 270885,
+          "startOffset": 273361,
           "length": 752
         },
         {
           "columnId": 3,
           "section": "LENGTH",
-          "startOffset": 271637,
+          "startOffset": 274113,
           "length": 25
         },
         {
           "columnId": 3,
           "section": "DICTIONARY_DATA",
-          "startOffset": 271662,
+          "startOffset": 274138,
           "length": 133
         }
       ],
@@ -1358,25 +1358,25 @@
           "bloomFilterIndexes": [
             {
               "entryId": 0,
-              "numHashFunctions": 4,
-              "bitCount": 6272,
-              "popCount": 138,
-              "loadFactor": 0.022002551704645157,
-              "expectedFpp": 2.3436470542037569E-7
+              "numHashFunctions": 7,
+              "bitCount": 9600,
+              "popCount": 238,
+              "loadFactor": 0.024791667237877846,
+              "expectedFpp": 5.756256582500896E-12
             }
           ],
           "stripeLevelBloomFilter": {
-            "numHashFunctions": 4,
-            "bitCount": 6272,
-            "popCount": 138,
-            "loadFactor": 0.022002551704645157,
-            "expectedFpp": 2.3436470542037569E-7
+            "numHashFunctions": 7,
+            "bitCount": 9600,
+            "popCount": 238,
+            "loadFactor": 0.024791667237877846,
+            "expectedFpp": 5.756256582500896E-12
           }
         }
       ]
     }
   ],
-  "fileLength": 272529,
+  "fileLength": 275003,
   "paddingLength": 0,
   "paddingRatio": 0.0,
   "status": "OK"