You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/10/23 01:30:41 UTC
[2/2] hive git commit: HIVE-11807: Set ORC buffer size in relation to set stripe size (Owen O'Malley reviewed by Gopal V)

HIVE-11807: Set ORC buffer size in relation to set stripe size (Owen O'Malley reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/51a0c03f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/51a0c03f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/51a0c03f

Branch: refs/heads/branch-1
Commit: 51a0c03f4fe1c90ca6ba9dbb7dad37a4a139c891
Parents: 30ca4fb
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Oct 22 16:30:22 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Oct 22 16:30:22 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/WriterImpl.java       |  99 ++++----
 .../hadoop/hive/ql/io/orc/TestOrcWideTable.java | 232 ++-----------------
 .../resources/orc-file-dump-bloomfilter.out     | 112 ++++-----
 .../resources/orc-file-dump-bloomfilter2.out    | 144 ++++++------
 .../orc-file-dump-dictionary-threshold.out      | 180 +++++++-------
 ql/src/test/resources/orc-file-dump.json        | 184 +++++++--------
 ql/src/test/resources/orc-file-dump.out         | 158 ++++++-------
 ql/src/test/resources/orc-file-has-null.out     |  80 +++----
 8 files changed, 493 insertions(+), 696 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/51a0c03f/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 58c7577..b5e6ad1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -22,7 +22,6 @@ import static com.google.common.base.Preconditions.checkArgument;
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.lang.management.ManagementFactory;
 import java.nio.ByteBuffer;
 import java.sql.Timestamp;
 import java.util.ArrayList;
@@ -209,7 +208,8 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     if (allColumns == null) {
       allColumns = getColumnNamesFromInspector(inspector);
     }
-    this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize);
+    this.bufferSize = getEstimatedBufferSize(defaultStripeSize,
+        countColumns(inspector), bufferSize);
     if (version == OrcFile.Version.V_0_11) {
       /* do not write bloom filters for ORC v11 */
       this.bloomFilterColumns =
@@ -242,49 +242,58 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     return joiner.join(fieldNames);
   }
 
-  @VisibleForTesting
-  int getEstimatedBufferSize(int bs) {
-      return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs);
-  }
-
-  int getEstimatedBufferSize(String colNames, int bs) {
-    long availableMem = getMemoryAvailableForORC();
-    if (colNames != null) {
-      final int numCols = colNames.split(",").length;
-      if (numCols > COLUMN_COUNT_THRESHOLD) {
-        // In BufferedStream, there are 3 outstream buffers (compressed,
-        // uncompressed and overflow) and list of previously compressed buffers.
-        // Since overflow buffer is rarely used, lets consider only 2 allocation.
-        // Also, initially, the list of compression buffers will be empty.
-        final int outStreamBuffers = codec == null ? 1 : 2;
-
-        // max possible streams per column is 5. For string columns, there is
-        // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams.
-        final int maxStreams = 5;
-
-        // Lets assume 10% memory for holding dictionary in memory and other
-        // object allocations
-        final long miscAllocation = (long) (0.1f * availableMem);
-
-        // compute the available memory
-        final long remainingMem = availableMem - miscAllocation;
-
-        int estBufferSize = (int) (remainingMem /
-            (maxStreams * outStreamBuffers * numCols));
-        estBufferSize = getClosestBufferSize(estBufferSize, bs);
-        if (estBufferSize > bs) {
-          estBufferSize = bs;
+  static int countColumns(ObjectInspector oi) {
+    switch (oi.getCategory()) {
+      case PRIMITIVE:
+        return 1;
+      case STRUCT: {
+        int result = 1;
+        for(StructField field:
+            ((StructObjectInspector) oi).getAllStructFieldRefs()) {
+          result += countColumns(field.getFieldObjectInspector());
         }
-
-        LOG.info("WIDE TABLE - Number of columns: " + numCols +
-            " Chosen compression buffer size: " + estBufferSize);
-        return estBufferSize;
+        return result;
+      }
+      case UNION: {
+        int result = 1;
+        for(ObjectInspector child:
+            ((UnionObjectInspector) oi).getObjectInspectors()) {
+          result += countColumns(child);
+        }
+        return result;
+      }
+      case LIST:
+        return 1 + countColumns(
+            ((ListObjectInspector)oi).getListElementObjectInspector());
+      case MAP: {
+        MapObjectInspector moi = (MapObjectInspector) oi;
+        return 1 + countColumns(moi.getMapKeyObjectInspector()) +
+            countColumns(moi.getMapValueObjectInspector());
       }
+      default:
+        throw new IllegalArgumentException("Unknown category " +
+            oi.getCategory());
     }
-    return bs;
   }
 
-  private int getClosestBufferSize(int estBufferSize, int bs) {
+  @VisibleForTesting
+  static int getEstimatedBufferSize(long stripeSize, int numColumns, int bs) {
+    // The worst case is that there are 2 big streams per a column and
+    // we want to guarantee that each stream gets ~10 buffers.
+    // This keeps buffers small enough that we don't get really small stripe
+    // sizes.
+    int estBufferSize = (int) (stripeSize / (20 * numColumns));
+    estBufferSize = getClosestBufferSize(estBufferSize);
+    if (estBufferSize > bs) {
+      estBufferSize = bs;
+    } else {
+      LOG.info("WIDE TABLE - Number of columns: " + numColumns +
+          " Chosen compression buffer size: " + estBufferSize);
+    }
+    return estBufferSize;
+  }
+
+  private static int getClosestBufferSize(int estBufferSize) {
     final int kb4 = 4 * 1024;
     final int kb8 = 8 * 1024;
     final int kb16 = 16 * 1024;
@@ -309,16 +318,6 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
-  // the assumption is only one ORC writer open at a time, which holds true for
-  // most of the cases. HIVE-6455 forces single writer case.
-  private long getMemoryAvailableForORC() {
-    HiveConf.ConfVars poolVar = HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL;
-    double maxLoad = conf.getFloat(poolVar.varname, poolVar.defaultFloatVal);
-    long totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().
-        getHeapMemoryUsage().getMax() * maxLoad);
-    return totalMemoryPool;
-  }
-
   public static CompressionCodec createCodec(CompressionKind kind) {
     switch (kind) {
       case NONE:

http://git-wip-us.apache.org/repos/asf/hive/blob/51a0c03f/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
index a3d3ec5..6b6cb2c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
@@ -38,241 +38,39 @@ import org.junit.rules.TestName;
 
 public class TestOrcWideTable {
 
-  private static final int MEMORY_FOR_ORC = 512 * 1024 * 1024;
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-  float memoryPercent;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-    // make sure constant memory is available for ORC always
-    memoryPercent = (float) MEMORY_FOR_ORC / (float) ManagementFactory.getMemoryMXBean().
-        getHeapMemoryUsage().getMax();
-    conf.setFloat(HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL.varname, memoryPercent);
-  }
-
   @Test
   public void testBufferSizeFor1Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 128 * 1024;
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
+    assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        1, 128*1024));
   }
 
   @Test
-  public void testBufferSizeFor1000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 128 * 1024;
-    String columns = getRandomColumnNames(1000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
+  public void testBufferSizeFor50Col() throws IOException {
+    assertEquals(256 * 1024, WriterImpl.getEstimatedBufferSize(256 * 1024 * 1024,
+        50, 256*1024));
   }
 
   @Test
-  public void testBufferSizeFor2000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(2000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.ZLIB).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(32 * 1024, newBufferSize);
-    }
+  public void testBufferSizeFor1000Col() throws IOException {
+    assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        1000, 128*1024));
   }
 
   @Test
-  public void testBufferSizeFor2000ColNoCompression() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(2000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(64 * 1024, newBufferSize);
-    }
+  public void testBufferSizeFor2000Col() throws IOException {
+    assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        2000, 256*1024));
   }
 
   @Test
   public void testBufferSizeFor4000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(4000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.ZLIB).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(16 * 1024, newBufferSize);
-    }
-  }
-
-  @Test
-  public void testBufferSizeFor4000ColNoCompression() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(4000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(32 * 1024, newBufferSize);
-    }
+    assertEquals(8 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        4000, 256*1024));
   }
 
   @Test
   public void testBufferSizeFor25000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(25000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      // 4K is the minimum buffer size
-      assertEquals(4 * 1024, newBufferSize);
-    }
-  }
-
-  @Test
-  public void testBufferSizeManualOverride1() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 1024;
-    String columns = getRandomColumnNames(2000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
-  }
-
-  @Test
-  public void testBufferSizeManualOverride2() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 2 * 1024;
-    String columns = getRandomColumnNames(4000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
-  }
-
-  private String getRandomColumnNames(int n) {
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < n - 1; i++) {
-      sb.append("col").append(i).append(",");
-    }
-    sb.append("col").append(n - 1);
-    return sb.toString();
+    assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        25000, 256*1024));
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/51a0c03f/ql/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter.out b/ql/src/test/resources/orc-file-dump-bloomfilter.out
index add163c..3420135 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter.out
@@ -2,7 +2,7 @@ Structure for TestFileDump.testDump.orc
 File Version: 0.12 with HIVE_8732
 Rows: 21000
 Compression: ZLIB
-Compression size: 10000
+Compression size: 4096
 Type: struct<i:int,l:bigint,s:string>
 
 Stripe Statistics:
@@ -39,17 +39,17 @@ File Statistics:
   Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
 
 Stripes:
-  Stripe: offset: 3 data: 63765 rows: 5000 tail: 86 index: 845
+  Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 951
     Stream: column 0 section ROW_INDEX start: 3 length 17
-    Stream: column 1 section ROW_INDEX start: 20 length 164
-    Stream: column 2 section ROW_INDEX start: 184 length 173
-    Stream: column 3 section ROW_INDEX start: 357 length 87
-    Stream: column 3 section BLOOM_FILTER start: 444 length 404
-    Stream: column 1 section DATA start: 848 length 20029
-    Stream: column 2 section DATA start: 20877 length 40035
-    Stream: column 3 section DATA start: 60912 length 3543
-    Stream: column 3 section LENGTH start: 64455 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 64480 length 133
+    Stream: column 1 section ROW_INDEX start: 20 length 166
+    Stream: column 2 section ROW_INDEX start: 186 length 169
+    Stream: column 3 section ROW_INDEX start: 355 length 87
+    Stream: column 3 section BLOOM_FILTER start: 442 length 512
+    Stream: column 1 section DATA start: 954 length 20035
+    Stream: column 2 section DATA start: 20989 length 40050
+    Stream: column 3 section DATA start: 61039 length 3543
+    Stream: column 3 section LENGTH start: 64582 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 64607 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -67,17 +67,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 64699 data: 63754 rows: 5000 tail: 86 index: 837
-    Stream: column 0 section ROW_INDEX start: 64699 length 17
-    Stream: column 1 section ROW_INDEX start: 64716 length 162
-    Stream: column 2 section ROW_INDEX start: 64878 length 171
-    Stream: column 3 section ROW_INDEX start: 65049 length 83
-    Stream: column 3 section BLOOM_FILTER start: 65132 length 404
-    Stream: column 1 section DATA start: 65536 length 20029
-    Stream: column 2 section DATA start: 85565 length 40035
-    Stream: column 3 section DATA start: 125600 length 3532
-    Stream: column 3 section LENGTH start: 129132 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 129157 length 133
+  Stripe: offset: 64826 data: 63775 rows: 5000 tail: 86 index: 944
+    Stream: column 0 section ROW_INDEX start: 64826 length 17
+    Stream: column 1 section ROW_INDEX start: 64843 length 164
+    Stream: column 2 section ROW_INDEX start: 65007 length 168
+    Stream: column 3 section ROW_INDEX start: 65175 length 83
+    Stream: column 3 section BLOOM_FILTER start: 65258 length 512
+    Stream: column 1 section DATA start: 65770 length 20035
+    Stream: column 2 section DATA start: 85805 length 40050
+    Stream: column 3 section DATA start: 125855 length 3532
+    Stream: column 3 section LENGTH start: 129387 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 129412 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -95,17 +95,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 129376 data: 63766 rows: 5000 tail: 86 index: 841
-    Stream: column 0 section ROW_INDEX start: 129376 length 17
-    Stream: column 1 section ROW_INDEX start: 129393 length 159
-    Stream: column 2 section ROW_INDEX start: 129552 length 171
-    Stream: column 3 section ROW_INDEX start: 129723 length 90
-    Stream: column 3 section BLOOM_FILTER start: 129813 length 404
-    Stream: column 1 section DATA start: 130217 length 20029
-    Stream: column 2 section DATA start: 150246 length 40035
-    Stream: column 3 section DATA start: 190281 length 3544
-    Stream: column 3 section LENGTH start: 193825 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 193850 length 133
+  Stripe: offset: 129631 data: 63787 rows: 5000 tail: 86 index: 950
+    Stream: column 0 section ROW_INDEX start: 129631 length 17
+    Stream: column 1 section ROW_INDEX start: 129648 length 163
+    Stream: column 2 section ROW_INDEX start: 129811 length 168
+    Stream: column 3 section ROW_INDEX start: 129979 length 90
+    Stream: column 3 section BLOOM_FILTER start: 130069 length 512
+    Stream: column 1 section DATA start: 130581 length 20035
+    Stream: column 2 section DATA start: 150616 length 40050
+    Stream: column 3 section DATA start: 190666 length 3544
+    Stream: column 3 section LENGTH start: 194210 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 194235 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -123,17 +123,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 194069 data: 63796 rows: 5000 tail: 86 index: 844
-    Stream: column 0 section ROW_INDEX start: 194069 length 17
-    Stream: column 1 section ROW_INDEX start: 194086 length 162
-    Stream: column 2 section ROW_INDEX start: 194248 length 170
-    Stream: column 3 section ROW_INDEX start: 194418 length 91
-    Stream: column 3 section BLOOM_FILTER start: 194509 length 404
-    Stream: column 1 section DATA start: 194913 length 20029
-    Stream: column 2 section DATA start: 214942 length 40035
-    Stream: column 3 section DATA start: 254977 length 3574
-    Stream: column 3 section LENGTH start: 258551 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 258576 length 133
+  Stripe: offset: 194454 data: 63817 rows: 5000 tail: 86 index: 952
+    Stream: column 0 section ROW_INDEX start: 194454 length 17
+    Stream: column 1 section ROW_INDEX start: 194471 length 165
+    Stream: column 2 section ROW_INDEX start: 194636 length 167
+    Stream: column 3 section ROW_INDEX start: 194803 length 91
+    Stream: column 3 section BLOOM_FILTER start: 194894 length 512
+    Stream: column 1 section DATA start: 195406 length 20035
+    Stream: column 2 section DATA start: 215441 length 40050
+    Stream: column 3 section DATA start: 255491 length 3574
+    Stream: column 3 section LENGTH start: 259065 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 259090 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -151,17 +151,17 @@ Stripes:
       Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-  Stripe: offset: 258795 data: 12940 rows: 1000 tail: 78 index: 432
-    Stream: column 0 section ROW_INDEX start: 258795 length 12
-    Stream: column 1 section ROW_INDEX start: 258807 length 38
-    Stream: column 2 section ROW_INDEX start: 258845 length 41
-    Stream: column 3 section ROW_INDEX start: 258886 length 40
-    Stream: column 3 section BLOOM_FILTER start: 258926 length 301
-    Stream: column 1 section DATA start: 259227 length 4007
-    Stream: column 2 section DATA start: 263234 length 8007
-    Stream: column 3 section DATA start: 271241 length 768
-    Stream: column 3 section LENGTH start: 272009 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 272034 length 133
+  Stripe: offset: 259309 data: 12943 rows: 1000 tail: 78 index: 432
+    Stream: column 0 section ROW_INDEX start: 259309 length 12
+    Stream: column 1 section ROW_INDEX start: 259321 length 38
+    Stream: column 2 section ROW_INDEX start: 259359 length 41
+    Stream: column 3 section ROW_INDEX start: 259400 length 40
+    Stream: column 3 section BLOOM_FILTER start: 259440 length 301
+    Stream: column 1 section DATA start: 259741 length 4007
+    Stream: column 2 section DATA start: 263748 length 8010
+    Stream: column 3 section DATA start: 271758 length 768
+    Stream: column 3 section LENGTH start: 272526 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 272551 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -172,6 +172,6 @@ Stripes:
       Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
 
-File length: 272790 bytes
+File length: 273307 bytes
 Padding length: 0 bytes
 Padding ratio: 0%

http://git-wip-us.apache.org/repos/asf/hive/blob/51a0c03f/ql/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter2.out b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
index 06b65ce..462d41f 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -2,7 +2,7 @@ Structure for TestFileDump.testDump.orc
 File Version: 0.12 with HIVE_8732
 Rows: 21000
 Compression: ZLIB
-Compression size: 10000
+Compression size: 4096
 Type: struct<i:int,l:bigint,s:string>
 
 Stripe Statistics:
@@ -39,27 +39,27 @@ File Statistics:
   Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
 
 Stripes:
-  Stripe: offset: 3 data: 63765 rows: 5000 tail: 85 index: 6935
+  Stripe: offset: 3 data: 63786 rows: 5000 tail: 85 index: 6974
     Stream: column 0 section ROW_INDEX start: 3 length 17
-    Stream: column 1 section ROW_INDEX start: 20 length 164
-    Stream: column 2 section ROW_INDEX start: 184 length 173
-    Stream: column 2 section BLOOM_FILTER start: 357 length 6494
-    Stream: column 3 section ROW_INDEX start: 6851 length 87
-    Stream: column 1 section DATA start: 6938 length 20029
-    Stream: column 2 section DATA start: 26967 length 40035
-    Stream: column 3 section DATA start: 67002 length 3543
-    Stream: column 3 section LENGTH start: 70545 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 70570 length 133
+    Stream: column 1 section ROW_INDEX start: 20 length 166
+    Stream: column 2 section ROW_INDEX start: 186 length 169
+    Stream: column 2 section BLOOM_FILTER start: 355 length 6535
+    Stream: column 3 section ROW_INDEX start: 6890 length 87
+    Stream: column 1 section DATA start: 6977 length 20035
+    Stream: column 2 section DATA start: 27012 length 40050
+    Stream: column 3 section DATA start: 67062 length 3543
+    Stream: column 3 section LENGTH start: 70605 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 70630 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
     Encoding column 3: DICTIONARY_V2[35]
     Row group indices for column 2:
       Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0
-      Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 0,4098,488
-      Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 10003,2294,464
-      Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20006,490,440
-      Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 20006,8686,416
+      Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416
     Bloom filters for column 2:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4931 loadFactor: 0.5136 expectedFpp: 0.009432924
       Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4956 loadFactor: 0.5163 expectedFpp: 0.009772834
@@ -67,27 +67,27 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482
-  Stripe: offset: 70788 data: 63754 rows: 5000 tail: 85 index: 6917
-    Stream: column 0 section ROW_INDEX start: 70788 length 17
-    Stream: column 1 section ROW_INDEX start: 70805 length 162
-    Stream: column 2 section ROW_INDEX start: 70967 length 171
-    Stream: column 2 section BLOOM_FILTER start: 71138 length 6484
-    Stream: column 3 section ROW_INDEX start: 77622 length 83
-    Stream: column 1 section DATA start: 77705 length 20029
-    Stream: column 2 section DATA start: 97734 length 40035
-    Stream: column 3 section DATA start: 137769 length 3532
-    Stream: column 3 section LENGTH start: 141301 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 141326 length 133
+  Stripe: offset: 70848 data: 63775 rows: 5000 tail: 85 index: 6965
+    Stream: column 0 section ROW_INDEX start: 70848 length 17
+    Stream: column 1 section ROW_INDEX start: 70865 length 164
+    Stream: column 2 section ROW_INDEX start: 71029 length 168
+    Stream: column 2 section BLOOM_FILTER start: 71197 length 6533
+    Stream: column 3 section ROW_INDEX start: 77730 length 83
+    Stream: column 1 section DATA start: 77813 length 20035
+    Stream: column 2 section DATA start: 97848 length 40050
+    Stream: column 3 section DATA start: 137898 length 3532
+    Stream: column 3 section LENGTH start: 141430 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 141455 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
     Encoding column 3: DICTIONARY_V2[35]
     Row group indices for column 2:
       Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0
-      Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 0,4098,488
-      Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 10003,2294,464
-      Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20006,490,440
-      Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 20006,8686,416
+      Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416
     Bloom filters for column 2:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772
       Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4988 loadFactor: 0.5196 expectedFpp: 0.010223193
@@ -95,27 +95,27 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205
-  Stripe: offset: 141544 data: 63766 rows: 5000 tail: 85 index: 6924
-    Stream: column 0 section ROW_INDEX start: 141544 length 17
-    Stream: column 1 section ROW_INDEX start: 141561 length 159
-    Stream: column 2 section ROW_INDEX start: 141720 length 171
-    Stream: column 2 section BLOOM_FILTER start: 141891 length 6487
-    Stream: column 3 section ROW_INDEX start: 148378 length 90
-    Stream: column 1 section DATA start: 148468 length 20029
-    Stream: column 2 section DATA start: 168497 length 40035
-    Stream: column 3 section DATA start: 208532 length 3544
-    Stream: column 3 section LENGTH start: 212076 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 212101 length 133
+  Stripe: offset: 141673 data: 63787 rows: 5000 tail: 85 index: 6971
+    Stream: column 0 section ROW_INDEX start: 141673 length 17
+    Stream: column 1 section ROW_INDEX start: 141690 length 163
+    Stream: column 2 section ROW_INDEX start: 141853 length 168
+    Stream: column 2 section BLOOM_FILTER start: 142021 length 6533
+    Stream: column 3 section ROW_INDEX start: 148554 length 90
+    Stream: column 1 section DATA start: 148644 length 20035
+    Stream: column 2 section DATA start: 168679 length 40050
+    Stream: column 3 section DATA start: 208729 length 3544
+    Stream: column 3 section LENGTH start: 212273 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 212298 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
     Encoding column 3: DICTIONARY_V2[35]
     Row group indices for column 2:
       Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0
-      Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 0,4098,488
-      Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 10003,2294,464
-      Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20006,490,440
-      Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 20006,8686,416
+      Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416
     Bloom filters for column 2:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4967 loadFactor: 0.5174 expectedFpp: 0.009925688
       Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 0.521 expectedFpp: 0.01042575
@@ -123,27 +123,27 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444
-  Stripe: offset: 212319 data: 63796 rows: 5000 tail: 85 index: 6925
-    Stream: column 0 section ROW_INDEX start: 212319 length 17
-    Stream: column 1 section ROW_INDEX start: 212336 length 162
-    Stream: column 2 section ROW_INDEX start: 212498 length 170
-    Stream: column 2 section BLOOM_FILTER start: 212668 length 6485
-    Stream: column 3 section ROW_INDEX start: 219153 length 91
-    Stream: column 1 section DATA start: 219244 length 20029
-    Stream: column 2 section DATA start: 239273 length 40035
-    Stream: column 3 section DATA start: 279308 length 3574
-    Stream: column 3 section LENGTH start: 282882 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 282907 length 133
+  Stripe: offset: 212516 data: 63817 rows: 5000 tail: 85 index: 6964
+    Stream: column 0 section ROW_INDEX start: 212516 length 17
+    Stream: column 1 section ROW_INDEX start: 212533 length 165
+    Stream: column 2 section ROW_INDEX start: 212698 length 167
+    Stream: column 2 section BLOOM_FILTER start: 212865 length 6524
+    Stream: column 3 section ROW_INDEX start: 219389 length 91
+    Stream: column 1 section DATA start: 219480 length 20035
+    Stream: column 2 section DATA start: 239515 length 40050
+    Stream: column 3 section DATA start: 279565 length 3574
+    Stream: column 3 section LENGTH start: 283139 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 283164 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
     Encoding column 3: DICTIONARY_V2[35]
     Row group indices for column 2:
       Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0
-      Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 0,4098,488
-      Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 10003,2294,464
-      Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20006,490,440
-      Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 20006,8686,416
+      Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416
     Bloom filters for column 2:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4951 loadFactor: 0.5157 expectedFpp: 0.009704026
       Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4969 loadFactor: 0.5176 expectedFpp: 0.009953696
@@ -151,17 +151,17 @@ Stripes:
       Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649
       Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165
-  Stripe: offset: 283125 data: 12940 rows: 1000 tail: 78 index: 1468
-    Stream: column 0 section ROW_INDEX start: 283125 length 12
-    Stream: column 1 section ROW_INDEX start: 283137 length 38
-    Stream: column 2 section ROW_INDEX start: 283175 length 41
-    Stream: column 2 section BLOOM_FILTER start: 283216 length 1337
-    Stream: column 3 section ROW_INDEX start: 284553 length 40
-    Stream: column 1 section DATA start: 284593 length 4007
-    Stream: column 2 section DATA start: 288600 length 8007
-    Stream: column 3 section DATA start: 296607 length 768
-    Stream: column 3 section LENGTH start: 297375 length 25
-    Stream: column 3 section DICTIONARY_DATA start: 297400 length 133
+  Stripe: offset: 283382 data: 12943 rows: 1000 tail: 78 index: 1468
+    Stream: column 0 section ROW_INDEX start: 283382 length 12
+    Stream: column 1 section ROW_INDEX start: 283394 length 38
+    Stream: column 2 section ROW_INDEX start: 283432 length 41
+    Stream: column 2 section BLOOM_FILTER start: 283473 length 1337
+    Stream: column 3 section ROW_INDEX start: 284810 length 40
+    Stream: column 1 section DATA start: 284850 length 4007
+    Stream: column 2 section DATA start: 288857 length 8010
+    Stream: column 3 section DATA start: 296867 length 768
+    Stream: column 3 section LENGTH start: 297635 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 297660 length 133
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DIRECT_V2
@@ -172,6 +172,6 @@ Stripes:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
 
-File length: 298155 bytes
+File length: 298416 bytes
 Padding length: 0 bytes
 Padding ratio: 0%