You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/05/07 03:52:35 UTC

hive git commit: HIVE-10592: ORC file dump in JSON format (Prasanth Jayachandran reviewed by Gopal V)

Repository: hive
Updated Branches:
  refs/heads/master 93995c8be -> 80fb89131


HIVE-10592: ORC file dump in JSON format (Prasanth Jayachandran reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/80fb8913
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/80fb8913
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/80fb8913

Branch: refs/heads/master
Commit: 80fb8913196eef8e4125544c3138b0c73be267b7
Parents: 93995c8
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed May 6 18:52:17 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed May 6 18:52:17 2015 -0700

----------------------------------------------------------------------
 bin/ext/orcfiledump.sh                          |    9 +-
 .../hive/ql/io/orc/ColumnStatisticsImpl.java    |   16 +-
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  |   91 +-
 .../hadoop/hive/ql/io/orc/JsonFileDump.java     |  365 +++++
 .../hadoop/hive/ql/io/orc/TestJsonFileDump.java |  138 ++
 ql/src/test/resources/orc-file-dump.json        | 1354 ++++++++++++++++++
 6 files changed, 1929 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/bin/ext/orcfiledump.sh
----------------------------------------------------------------------
diff --git a/bin/ext/orcfiledump.sh b/bin/ext/orcfiledump.sh
index 752e437..6139de2 100644
--- a/bin/ext/orcfiledump.sh
+++ b/bin/ext/orcfiledump.sh
@@ -23,5 +23,12 @@ orcfiledump () {
 }
 
 orcfiledump_help () {
-  echo "usage ./hive orcfiledump [-d] [--rowindex <col_ids>] <path_to_file>"
+  echo "usage ./hive orcfiledump [-h] [-j] [-p] [-t] [-d] [-r <col_ids>] <path_to_file>"
+  echo ""
+  echo "  --json (-j)                 Print metadata in JSON format"
+  echo "  --pretty (-p)               Pretty print json metadata output"
+  echo "  --timezone (-t)             Print writer's time zone"
+  echo "  --data (-d)                 Should the data be printed"
+  echo "  --rowindex (-r) <_col_ids_> Comma separated list of column ids for which row index should be printed"
+  echo "  --help (-h)                 Print help message"
 } 

http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
index 7cfbd81..ffba3c6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
@@ -699,12 +699,18 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     public Date getMinimum() {
+      if (minimum == null) {
+        return null;
+      }
       minDate.set(minimum);
       return minDate.get();
     }
 
     @Override
     public Date getMaximum() {
+      if (maximum == null) {
+        return null;
+      }
       maxDate.set(maximum);
       return maxDate.get();
     }
@@ -793,14 +799,12 @@ class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     public Timestamp getMinimum() {
-      Timestamp minTimestamp = new Timestamp(minimum);
-      return minTimestamp;
+      return minimum == null ? null : new Timestamp(minimum);
     }
 
     @Override
     public Timestamp getMaximum() {
-      Timestamp maxTimestamp = new Timestamp(maximum);
-      return maxTimestamp;
+      return maximum == null ? null : new Timestamp(maximum);
     }
 
     @Override
@@ -808,9 +812,9 @@ class ColumnStatisticsImpl implements ColumnStatistics {
       StringBuilder buf = new StringBuilder(super.toString());
       if (getNumberOfValues() != 0) {
         buf.append(" min: ");
-        buf.append(minimum);
+        buf.append(getMinimum());
         buf.append(" max: ");
-        buf.append(maximum);
+        buf.append(getMaximum());
       }
       return buf.toString();
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index cd4db75..33c4cd8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -50,10 +50,11 @@ import org.codehaus.jettison.json.JSONWriter;
  * A tool for printing out the file structure of ORC files.
  */
 public final class FileDump {
-  private static final String UNKNOWN = "UNKNOWN";
+  public static final String UNKNOWN = "UNKNOWN";
 
   // not used
-  private FileDump() {}
+  private FileDump() {
+  }
 
   public static void main(String[] args) throws Exception {
     Configuration conf = new Configuration();
@@ -69,21 +70,28 @@ public final class FileDump {
     }
 
     boolean dumpData = cli.hasOption('d');
-    if (cli.hasOption("rowindex")) {
-      String[] colStrs = cli.getOptionValue("rowindex").split(",");
+    if (cli.hasOption("r")) {
+      String[] colStrs = cli.getOptionValue("r").split(",");
       rowIndexCols = new ArrayList<Integer>(colStrs.length);
       for (String colStr : colStrs) {
         rowIndexCols.add(Integer.parseInt(colStr));
       }
     }
 
-    boolean printTimeZone = false;
-    if (cli.hasOption('t')) {
-      printTimeZone = true;
-    }
+    boolean printTimeZone = cli.hasOption('t');
+    boolean jsonFormat = cli.hasOption('j');
     String[] files = cli.getArgs();
-    if (dumpData) printData(Arrays.asList(files), conf);
-    else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+    if (dumpData) {
+      printData(Arrays.asList(files), conf);
+    } else {
+      if (jsonFormat) {
+        boolean prettyPrint = cli.hasOption('p');
+        JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint,
+            printTimeZone);
+      } else {
+        printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+      }
+    }
   }
 
   private static void printData(List<String> files, Configuration conf) throws IOException,
@@ -100,7 +108,7 @@ public final class FileDump {
       Path path = new Path(filename);
       Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
       System.out.println("File Version: " + reader.getFileVersion().getName() +
-                         " with " + reader.getWriterVersion());
+          " with " + reader.getWriterVersion());
       RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
       System.out.println("Rows: " + reader.getNumberOfRows());
       System.out.println("Compression: " + reader.getCompression());
@@ -121,7 +129,7 @@ public final class FileDump {
       ColumnStatistics[] stats = reader.getStatistics();
       int colCount = stats.length;
       System.out.println("\nFile Statistics:");
-      for(int i=0; i < stats.length; ++i) {
+      for (int i = 0; i < stats.length; ++i) {
         System.out.println("  Column " + i + ": " + stats[i].toString());
       }
       System.out.println("\nStripes:");
@@ -140,7 +148,7 @@ public final class FileDump {
           System.out.println("  Stripe: " + stripe.toString());
         }
         long sectionStart = stripeStart;
-        for(OrcProto.Stream section: footer.getStreamsList()) {
+        for (OrcProto.Stream section : footer.getStreamsList()) {
           String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
           System.out.println("    Stream: column " + section.getColumn() +
               " section " + kind + " start: " + sectionStart +
@@ -270,7 +278,7 @@ public final class FileDump {
     return buf.toString();
   }
 
-  private static long getTotalPaddingSize(Reader reader) throws IOException {
+  public static long getTotalPaddingSize(Reader reader) throws IOException {
     long paddedBytes = 0;
     List<org.apache.hadoop.hive.ql.io.orc.StripeInformation> stripes = reader.getStripes();
     for (int i = 1; i < stripes.size(); i++) {
@@ -307,21 +315,30 @@ public final class FileDump {
         .withArgName("comma separated list of column ids for which row index should be printed")
         .withDescription("Dump stats for column number(s)")
         .hasArg()
-        .create());
+        .create('r'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("json")
+        .withDescription("Print metadata in JSON format")
+        .create('j'));
 
+    result.addOption(OptionBuilder
+            .withLongOpt("pretty")
+            .withDescription("Pretty print json metadata output")
+            .create('p'));
 
     return result;
   }
 
   private static void printMap(JSONWriter writer,
-                               Map<Object, Object> obj,
-                               List<OrcProto.Type> types,
-                               OrcProto.Type type
+      Map<Object, Object> obj,
+      List<OrcProto.Type> types,
+      OrcProto.Type type
   ) throws IOException, JSONException {
     writer.array();
     int keyType = type.getSubtypes(0);
     int valueType = type.getSubtypes(1);
-    for(Map.Entry<Object,Object> item: obj.entrySet()) {
+    for (Map.Entry<Object, Object> item : obj.entrySet()) {
       writer.object();
       writer.key("_key");
       printObject(writer, item.getKey(), types, keyType);
@@ -333,34 +350,34 @@ public final class FileDump {
   }
 
   private static void printList(JSONWriter writer,
-                                List<Object> obj,
-                                List<OrcProto.Type> types,
-                                OrcProto.Type type
+      List<Object> obj,
+      List<OrcProto.Type> types,
+      OrcProto.Type type
   ) throws IOException, JSONException {
     int subtype = type.getSubtypes(0);
     writer.array();
-    for(Object item: obj) {
+    for (Object item : obj) {
       printObject(writer, item, types, subtype);
     }
     writer.endArray();
   }
 
   private static void printUnion(JSONWriter writer,
-                                 OrcUnion obj,
-                                 List<OrcProto.Type> types,
-                                 OrcProto.Type type
+      OrcUnion obj,
+      List<OrcProto.Type> types,
+      OrcProto.Type type
   ) throws IOException, JSONException {
     int subtype = type.getSubtypes(obj.getTag());
     printObject(writer, obj.getObject(), types, subtype);
   }
 
   static void printStruct(JSONWriter writer,
-                          OrcStruct obj,
-                          List<OrcProto.Type> types,
-                          OrcProto.Type type) throws IOException, JSONException {
+      OrcStruct obj,
+      List<OrcProto.Type> types,
+      OrcProto.Type type) throws IOException, JSONException {
     writer.object();
     List<Integer> fieldTypes = type.getSubtypesList();
-    for(int i=0; i < fieldTypes.size(); ++i) {
+    for (int i = 0; i < fieldTypes.size(); ++i) {
       writer.key(type.getFieldNames(i));
       printObject(writer, obj.getFieldValue(i), types, fieldTypes.get(i));
     }
@@ -368,9 +385,9 @@ public final class FileDump {
   }
 
   static void printObject(JSONWriter writer,
-                          Object obj,
-                          List<OrcProto.Type> types,
-                          int typeId) throws IOException, JSONException {
+      Object obj,
+      List<OrcProto.Type> types,
+      int typeId) throws IOException, JSONException {
     OrcProto.Type type = types.get(typeId);
     if (obj == null) {
       writer.value(null);
@@ -417,7 +434,7 @@ public final class FileDump {
   }
 
   static void printJsonData(Configuration conf,
-                            String filename) throws IOException, JSONException {
+      String filename) throws IOException, JSONException {
     Path path = new Path(filename);
     Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
     OutputStreamWriter out = new OutputStreamWriter(System.out, "UTF-8");

http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
new file mode 100644
index 0000000..c33004e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
@@ -0,0 +1,365 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONStringer;
+import org.codehaus.jettison.json.JSONWriter;
+
+/**
+ * File dump tool with json formatted output.
+ */
+public class JsonFileDump {
+
+  public static void printJsonMetaData(List<String> files, Configuration conf,
+      List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone) throws JSONException, IOException {
+    JSONStringer writer = new JSONStringer();
+    boolean multiFile = files.size() > 1;
+    if (multiFile) {
+      writer.array();
+    } else {
+      writer.object();
+    }
+    for (String filename : files) {
+      if (multiFile) {
+        writer.object();
+      }
+      writer.key("fileName").value(filename);
+      Path path = new Path(filename);
+      Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+      writer.key("fileVersion").value(reader.getFileVersion().getName());
+      writer.key("writerVersion").value(reader.getWriterVersion());
+      RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+      writer.key("numberOfRows").value(reader.getNumberOfRows());
+      writer.key("compression").value(reader.getCompression());
+      if (reader.getCompression() != CompressionKind.NONE) {
+        writer.key("compressionBufferSize").value(reader.getCompressionSize());
+      }
+      writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
+      writer.key("schema").array();
+      writeSchema(writer, reader.getTypes());
+      writer.endArray();
+
+      writer.key("stripeStatistics").array();
+      Metadata metadata = reader.getMetadata();
+      for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
+        writer.object();
+        writer.key("stripeNumber").value(n + 1);
+        StripeStatistics ss = metadata.getStripeStatistics().get(n);
+        writer.key("columnStatistics").array();
+        for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+          writer.object();
+          writer.key("columnId").value(i);
+          writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+          writer.endObject();
+        }
+        writer.endArray();
+        writer.endObject();
+      }
+      writer.endArray();
+
+      ColumnStatistics[] stats = reader.getStatistics();
+      int colCount = stats.length;
+      writer.key("fileStatistics").array();
+      for (int i = 0; i < stats.length; ++i) {
+        writer.object();
+        writer.key("columnId").value(i);
+        writeColumnStatistics(writer, stats[i]);
+        writer.endObject();
+      }
+      writer.endArray();
+
+      writer.key("stripes").array();
+      int stripeIx = -1;
+      for (StripeInformation stripe : reader.getStripes()) {
+        ++stripeIx;
+        long stripeStart = stripe.getOffset();
+        OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+        writer.object(); // start of stripe information
+        writer.key("stripeNumber").value(stripeIx + 1);
+        writer.key("stripeInformation");
+        writeStripeInformation(writer, stripe);
+        if (printTimeZone) {
+          writer.key("writerTimezone").value(
+              footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
+        }
+        long sectionStart = stripeStart;
+
+        writer.key("streams").array();
+        for (OrcProto.Stream section : footer.getStreamsList()) {
+          writer.object();
+          String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
+          writer.key("columnId").value(section.getColumn());
+          writer.key("section").value(kind);
+          writer.key("startOffset").value(sectionStart);
+          writer.key("length").value(section.getLength());
+          sectionStart += section.getLength();
+          writer.endObject();
+        }
+        writer.endArray();
+
+        writer.key("encodings").array();
+        for (int i = 0; i < footer.getColumnsCount(); ++i) {
+          writer.object();
+          OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+          writer.key("columnId").value(i);
+          writer.key("kind").value(encoding.getKind());
+          if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+              encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+            writer.key("dictionarySize").value(encoding.getDictionarySize());
+          }
+          writer.endObject();
+        }
+        writer.endArray();
+
+        if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+          // include the columns that are specified, only if the columns are included, bloom filter
+          // will be read
+          boolean[] sargColumns = new boolean[colCount];
+          for (int colIdx : rowIndexCols) {
+            sargColumns[colIdx] = true;
+          }
+          RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
+          writer.key("indexes").array();
+          for (int col : rowIndexCols) {
+            writer.object();
+            writer.key("columnId").value(col);
+            writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+            writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+            writer.endObject();
+          }
+          writer.endArray();
+        }
+        writer.endObject(); // end of stripe information
+      }
+      writer.endArray();
+
+      FileSystem fs = path.getFileSystem(conf);
+      long fileLen = fs.getContentSummary(path).getLength();
+      long paddedBytes = FileDump.getTotalPaddingSize(reader);
+      // empty ORC file is ~45 bytes. Assumption here is file length always >0
+      double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+      writer.key("fileLength").value(fileLen);
+      writer.key("paddingLength").value(paddedBytes);
+      writer.key("paddingRatio").value(percentPadding);
+      rows.close();
+
+      writer.endObject();
+    }
+    if (multiFile) {
+      writer.endArray();
+    }
+
+    if (prettyPrint) {
+      final String prettyJson;
+      if (multiFile) {
+        JSONArray jsonArray = new JSONArray(writer.toString());
+        prettyJson = jsonArray.toString(2);
+      } else {
+        JSONObject jsonObject = new JSONObject(writer.toString());
+        prettyJson = jsonObject.toString(2);
+      }
+      System.out.println(prettyJson);
+    } else {
+      System.out.println(writer.toString());
+    }
+  }
+
+  private static void writeSchema(JSONStringer writer, List<OrcProto.Type> types)
+      throws JSONException {
+    int i = 0;
+    for(OrcProto.Type type : types) {
+      writer.object();
+      writer.key("columnId").value(i++);
+      writer.key("columnType").value(type.getKind());
+      if (type.getFieldNamesCount() > 0) {
+        writer.key("childColumnNames").array();
+        for (String field : type.getFieldNamesList()) {
+          writer.value(field);
+        }
+        writer.endArray();
+        writer.key("childColumnIds").array();
+        for (Integer colId : type.getSubtypesList()) {
+          writer.value(colId);
+        }
+        writer.endArray();
+      }
+      if (type.hasPrecision()) {
+        writer.key("precision").value(type.getPrecision());
+      }
+
+      if (type.hasScale()) {
+        writer.key("scale").value(type.getScale());
+      }
+
+      if (type.hasMaximumLength()) {
+        writer.key("maxLength").value(type.getMaximumLength());
+      }
+      writer.endObject();
+    }
+  }
+
+  private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe)
+      throws JSONException {
+    writer.object();
+    writer.key("offset").value(stripe.getOffset());
+    writer.key("indexLength").value(stripe.getIndexLength());
+    writer.key("dataLength").value(stripe.getDataLength());
+    writer.key("footerLength").value(stripe.getFooterLength());
+    writer.key("rowCount").value(stripe.getNumberOfRows());
+    writer.endObject();
+  }
+
+  private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs)
+      throws JSONException {
+    if (cs != null) {
+      writer.key("count").value(cs.getNumberOfValues());
+      writer.key("hasNull").value(cs.hasNull());
+      if (cs instanceof BinaryColumnStatistics) {
+        writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum());
+        writer.key("type").value(OrcProto.Type.Kind.BINARY);
+      } else if (cs instanceof BooleanColumnStatistics) {
+        writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount());
+        writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount());
+        writer.key("type").value(OrcProto.Type.Kind.BOOLEAN);
+      } else if (cs instanceof IntegerColumnStatistics) {
+        writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum());
+        writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum());
+        if (((IntegerColumnStatistics) cs).isSumDefined()) {
+          writer.key("sum").value(((IntegerColumnStatistics) cs).getSum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.LONG);
+      } else if (cs instanceof DoubleColumnStatistics) {
+        writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum());
+        writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum());
+        writer.key("sum").value(((DoubleColumnStatistics) cs).getSum());
+        writer.key("type").value(OrcProto.Type.Kind.DOUBLE);
+      } else if (cs instanceof StringColumnStatistics) {
+        writer.key("min").value(((StringColumnStatistics) cs).getMinimum());
+        writer.key("max").value(((StringColumnStatistics) cs).getMaximum());
+        writer.key("totalLength").value(((StringColumnStatistics) cs).getSum());
+        writer.key("type").value(OrcProto.Type.Kind.STRING);
+      } else if (cs instanceof DateColumnStatistics) {
+        if (((DateColumnStatistics) cs).getMaximum() != null) {
+          writer.key("min").value(((DateColumnStatistics) cs).getMinimum());
+          writer.key("max").value(((DateColumnStatistics) cs).getMaximum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.DATE);
+      } else if (cs instanceof TimestampColumnStatistics) {
+        if (((TimestampColumnStatistics) cs).getMaximum() != null) {
+          writer.key("min").value(((TimestampColumnStatistics) cs).getMinimum());
+          writer.key("max").value(((TimestampColumnStatistics) cs).getMaximum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.TIMESTAMP);
+      } else if (cs instanceof DecimalColumnStatistics) {
+        if (((DecimalColumnStatistics) cs).getMaximum() != null) {
+          writer.key("min").value(((DecimalColumnStatistics) cs).getMinimum());
+          writer.key("max").value(((DecimalColumnStatistics) cs).getMaximum());
+          writer.key("sum").value(((DecimalColumnStatistics) cs).getSum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.DECIMAL);
+      }
+    }
+  }
+
+  private static void writeBloomFilterIndexes(JSONWriter writer, int col,
+      OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException {
+
+    BloomFilterIO stripeLevelBF = null;
+    if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
+      int entryIx = 0;
+      writer.key("bloomFilterIndexes").array();
+      for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
+        writer.object();
+        writer.key("entryId").value(entryIx++);
+        BloomFilterIO toMerge = new BloomFilterIO(bf);
+        writeBloomFilterStats(writer, toMerge);
+        if (stripeLevelBF == null) {
+          stripeLevelBF = toMerge;
+        } else {
+          stripeLevelBF.merge(toMerge);
+        }
+        writer.endObject();
+      }
+      writer.endArray();
+    }
+    if (stripeLevelBF != null) {
+      writer.key("stripeLevelBloomFilter");
+      writer.object();
+      writeBloomFilterStats(writer, stripeLevelBF);
+      writer.endObject();
+    }
+  }
+
+  private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf)
+      throws JSONException {
+    int bitCount = bf.getBitSize();
+    int popCount = 0;
+    for (long l : bf.getBitSet()) {
+      popCount += Long.bitCount(l);
+    }
+    int k = bf.getNumHashFunctions();
+    float loadFactor = (float) popCount / (float) bitCount;
+    float expectedFpp = (float) Math.pow(loadFactor, k);
+    writer.key("numHashFunctions").value(k);
+    writer.key("bitCount").value(bitCount);
+    writer.key("popCount").value(popCount);
+    writer.key("loadFactor").value(loadFactor);
+    writer.key("expectedFpp").value(expectedFpp);
+  }
+
+  private static void writeRowGroupIndexes(JSONWriter writer, int col,
+      OrcProto.RowIndex[] rowGroupIndex)
+      throws JSONException {
+
+    OrcProto.RowIndex index;
+    if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
+        ((index = rowGroupIndex[col]) == null)) {
+      return;
+    }
+
+    writer.key("rowGroupIndexes").array();
+    for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
+      writer.object();
+      writer.key("entryId").value(entryIx);
+      OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
+      if (entry == null) {
+        continue;
+      }
+      OrcProto.ColumnStatistics colStats = entry.getStatistics();
+      writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
+      writer.key("positions").array();
+      for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
+        writer.value(entry.getPositions(posIx));
+      }
+      writer.endArray();
+      writer.endObject();
+    }
+    writer.endArray();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
new file mode 100644
index 0000000..d17c528
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestJsonFileDump {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static class MyRecord {
+    int i;
+    long l;
+    String s;
+    MyRecord(int i, long l, String s) {
+      this.i = i;
+      this.l = l;
+      this.s = s;
+    }
+  }
+
+  static void checkOutput(String expected,
+                                  String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader(HiveTestUtils.getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+    }
+    assertNull(eStream.readLine());
+    assertNull(aStream.readLine());
+  }
+
+  @Test
+  public void testJsonDump() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .inspector(inspector)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("s");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    for(int i=0; i < 21000; ++i) {
+      if (i % 100 == 0) {
+        writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(), null));
+      } else {
+        writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+            words[r1.nextInt(words.length)]));
+      }
+    }
+
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.json";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json
new file mode 100644
index 0000000..125a32e
--- /dev/null
+++ b/ql/src/test/resources/orc-file-dump.json
@@ -0,0 +1,1354 @@
+{
+  "fileName": "TestFileDump.testDump.orc",
+  "fileVersion": "0.12",
+  "writerVersion": "HIVE_8732",
+  "numberOfRows": 21000,
+  "compression": "ZLIB",
+  "compressionBufferSize": 10000,
+  "schemaString": "struct<i:int,l:bigint,s:string>",
+  "schema": [
+    {
+      "columnId": 0,
+      "columnType": "STRUCT",
+      "childColumnNames": [
+        "i",
+        "l",
+        "s"
+      ],
+      "childColumnIds": [
+        1,
+        2,
+        3
+      ]
+    },
+    {
+      "columnId": 1,
+      "columnType": "INT"
+    },
+    {
+      "columnId": 2,
+      "columnType": "LONG"
+    },
+    {
+      "columnId": 3,
+      "columnType": "STRING"
+    }
+  ],
+  "stripeStatistics": [
+    {
+      "stripeNumber": 1,
+      "columnStatistics": [
+        {
+          "columnId": 0,
+          "count": 5000,
+          "hasNull": false
+        },
+        {
+          "columnId": 1,
+          "count": 5000,
+          "hasNull": false,
+          "min": -2147115959,
+          "max": 2145210552,
+          "sum": 50111854553,
+          "type": "LONG"
+        },
+        {
+          "columnId": 2,
+          "count": 5000,
+          "hasNull": false,
+          "min": -9223180583305557329,
+          "max": 9221614132680747961,
+          "type": "LONG"
+        },
+        {
+          "columnId": 3,
+          "count": 4950,
+          "hasNull": true,
+          "min": "Darkness,",
+          "max": "worst",
+          "totalLength": 19283,
+          "type": "STRING"
+        }
+      ]
+    },
+    {
+      "stripeNumber": 2,
+      "columnStatistics": [
+        {
+          "columnId": 0,
+          "count": 5000,
+          "hasNull": false
+        },
+        {
+          "columnId": 1,
+          "count": 5000,
+          "hasNull": false,
+          "min": -2147390285,
+          "max": 2147224606,
+          "sum": -22290798217,
+          "type": "LONG"
+        },
+        {
+          "columnId": 2,
+          "count": 5000,
+          "hasNull": false,
+          "min": -9219295160509160427,
+          "max": 9217571024994660020,
+          "type": "LONG"
+        },
+        {
+          "columnId": 3,
+          "count": 4950,
+          "hasNull": true,
+          "min": "Darkness,",
+          "max": "worst",
+          "totalLength": 19397,
+          "type": "STRING"
+        }
+      ]
+    },
+    {
+      "stripeNumber": 3,
+      "columnStatistics": [
+        {
+          "columnId": 0,
+          "count": 5000,
+          "hasNull": false
+        },
+        {
+          "columnId": 1,
+          "count": 5000,
+          "hasNull": false,
+          "min": -2146954065,
+          "max": 2146722468,
+          "sum": 20639652136,
+          "type": "LONG"
+        },
+        {
+          "columnId": 2,
+          "count": 5000,
+          "hasNull": false,
+          "min": -9214076359988107846,
+          "max": 9222919052987871506,
+          "type": "LONG"
+        },
+        {
+          "columnId": 3,
+          "count": 4950,
+          "hasNull": true,
+          "min": "Darkness,",
+          "max": "worst",
+          "totalLength": 19031,
+          "type": "STRING"
+        }
+      ]
+    },
+    {
+      "stripeNumber": 4,
+      "columnStatistics": [
+        {
+          "columnId": 0,
+          "count": 5000,
+          "hasNull": false
+        },
+        {
+          "columnId": 1,
+          "count": 5000,
+          "hasNull": false,
+          "min": -2146969085,
+          "max": 2146025044,
+          "sum": -5156814387,
+          "type": "LONG"
+        },
+        {
+          "columnId": 2,
+          "count": 5000,
+          "hasNull": false,
+          "min": -9222731174895935707,
+          "max": 9220625004936875965,
+          "type": "LONG"
+        },
+        {
+          "columnId": 3,
+          "count": 4950,
+          "hasNull": true,
+          "min": "Darkness,",
+          "max": "worst",
+          "totalLength": 19459,
+          "type": "STRING"
+        }
+      ]
+    },
+    {
+      "stripeNumber": 5,
+      "columnStatistics": [
+        {
+          "columnId": 0,
+          "count": 1000,
+          "hasNull": false
+        },
+        {
+          "columnId": 1,
+          "count": 1000,
+          "hasNull": false,
+          "min": -2144303438,
+          "max": 2127599049,
+          "sum": 62841564778,
+          "type": "LONG"
+        },
+        {
+          "columnId": 2,
+          "count": 1000,
+          "hasNull": false,
+          "min": -9195133638801798919,
+          "max": 9218626063131504414,
+          "type": "LONG"
+        },
+        {
+          "columnId": 3,
+          "count": 990,
+          "hasNull": true,
+          "min": "Darkness,",
+          "max": "worst",
+          "totalLength": 3963,
+          "type": "STRING"
+        }
+      ]
+    }
+  ],
+  "fileStatistics": [
+    {
+      "columnId": 0,
+      "count": 21000,
+      "hasNull": false
+    },
+    {
+      "columnId": 1,
+      "count": 21000,
+      "hasNull": false,
+      "min": -2147390285,
+      "max": 2147224606,
+      "sum": 106145458863,
+      "type": "LONG"
+    },
+    {
+      "columnId": 2,
+      "count": 21000,
+      "hasNull": false,
+      "min": -9223180583305557329,
+      "max": 9222919052987871506,
+      "type": "LONG"
+    },
+    {
+      "columnId": 3,
+      "count": 20790,
+      "hasNull": true,
+      "min": "Darkness,",
+      "max": "worst",
+      "totalLength": 81133,
+      "type": "STRING"
+    }
+  ],
+  "stripes": [
+    {
+      "stripeNumber": 1,
+      "stripeInformation": {
+        "offset": 3,
+        "indexLength": 863,
+        "dataLength": 63749,
+        "footerLength": 103,
+        "rowCount": 5000
+      },
+      "streams": [
+        {
+          "columnId": 0,
+          "section": "ROW_INDEX",
+          "startOffset": 3,
+          "length": 17
+        },
+        {
+          "columnId": 1,
+          "section": "ROW_INDEX",
+          "startOffset": 20,
+          "length": 165
+        },
+        {
+          "columnId": 2,
+          "section": "ROW_INDEX",
+          "startOffset": 185,
+          "length": 174
+        },
+        {
+          "columnId": 3,
+          "section": "ROW_INDEX",
+          "startOffset": 359,
+          "length": 103
+        },
+        {
+          "columnId": 3,
+          "section": "BLOOM_FILTER",
+          "startOffset": 462,
+          "length": 404
+        },
+        {
+          "columnId": 1,
+          "section": "DATA",
+          "startOffset": 866,
+          "length": 20029
+        },
+        {
+          "columnId": 2,
+          "section": "DATA",
+          "startOffset": 20895,
+          "length": 40035
+        },
+        {
+          "columnId": 3,
+          "section": "PRESENT",
+          "startOffset": 60930,
+          "length": 17
+        },
+        {
+          "columnId": 3,
+          "section": "DATA",
+          "startOffset": 60947,
+          "length": 3510
+        },
+        {
+          "columnId": 3,
+          "section": "LENGTH",
+          "startOffset": 64457,
+          "length": 25
+        },
+        {
+          "columnId": 3,
+          "section": "DICTIONARY_DATA",
+          "startOffset": 64482,
+          "length": 133
+        }
+      ],
+      "encodings": [
+        {
+          "columnId": 0,
+          "kind": "DIRECT"
+        },
+        {
+          "columnId": 1,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 2,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 3,
+          "kind": "DICTIONARY_V2",
+          "dictionarySize": 35
+        }
+      ],
+      "indexes": [{
+        "columnId": 3,
+        "rowGroupIndexes": [
+          {
+            "entryId": 0,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3873,
+            "type": "STRING",
+            "positions": [
+              0,
+              0,
+              0,
+              0,
+              0,
+              0,
+              0
+            ]
+          },
+          {
+            "entryId": 1,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3861,
+            "type": "STRING",
+            "positions": [
+              0,
+              38,
+              12,
+              0,
+              0,
+              736,
+              23
+            ]
+          },
+          {
+            "entryId": 2,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3946,
+            "type": "STRING",
+            "positions": [
+              0,
+              78,
+              12,
+              0,
+              0,
+              1473,
+              43
+            ]
+          },
+          {
+            "entryId": 3,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3774,
+            "type": "STRING",
+            "positions": [
+              0,
+              118,
+              12,
+              0,
+              0,
+              2067,
+              261
+            ]
+          },
+          {
+            "entryId": 4,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3829,
+            "type": "STRING",
+            "positions": [
+              0,
+              158,
+              12,
+              0,
+              0,
+              2992,
+              35
+            ]
+          }
+        ],
+        "bloomFilterIndexes": [
+          {
+            "entryId": 0,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 1,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 2,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 3,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 4,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          }
+        ],
+        "stripeLevelBloomFilter": {
+          "numHashFunctions": 4,
+          "bitCount": 6272,
+          "popCount": 138,
+          "loadFactor": 0.022002551704645157,
+          "expectedFpp": 2.3436470542037569E-7
+        }
+      }]
+    },
+    {
+      "stripeNumber": 2,
+      "stripeInformation": {
+        "offset": 64718,
+        "indexLength": 854,
+        "dataLength": 63742,
+        "footerLength": 103,
+        "rowCount": 5000
+      },
+      "streams": [
+        {
+          "columnId": 0,
+          "section": "ROW_INDEX",
+          "startOffset": 64718,
+          "length": 17
+        },
+        {
+          "columnId": 1,
+          "section": "ROW_INDEX",
+          "startOffset": 64735,
+          "length": 164
+        },
+        {
+          "columnId": 2,
+          "section": "ROW_INDEX",
+          "startOffset": 64899,
+          "length": 169
+        },
+        {
+          "columnId": 3,
+          "section": "ROW_INDEX",
+          "startOffset": 65068,
+          "length": 100
+        },
+        {
+          "columnId": 3,
+          "section": "BLOOM_FILTER",
+          "startOffset": 65168,
+          "length": 404
+        },
+        {
+          "columnId": 1,
+          "section": "DATA",
+          "startOffset": 65572,
+          "length": 20029
+        },
+        {
+          "columnId": 2,
+          "section": "DATA",
+          "startOffset": 85601,
+          "length": 40035
+        },
+        {
+          "columnId": 3,
+          "section": "PRESENT",
+          "startOffset": 125636,
+          "length": 17
+        },
+        {
+          "columnId": 3,
+          "section": "DATA",
+          "startOffset": 125653,
+          "length": 3503
+        },
+        {
+          "columnId": 3,
+          "section": "LENGTH",
+          "startOffset": 129156,
+          "length": 25
+        },
+        {
+          "columnId": 3,
+          "section": "DICTIONARY_DATA",
+          "startOffset": 129181,
+          "length": 133
+        }
+      ],
+      "encodings": [
+        {
+          "columnId": 0,
+          "kind": "DIRECT"
+        },
+        {
+          "columnId": 1,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 2,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 3,
+          "kind": "DICTIONARY_V2",
+          "dictionarySize": 35
+        }
+      ],
+      "indexes": [{
+        "columnId": 3,
+        "rowGroupIndexes": [
+          {
+            "entryId": 0,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3946,
+            "type": "STRING",
+            "positions": [
+              0,
+              0,
+              0,
+              0,
+              0,
+              0,
+              0
+            ]
+          },
+          {
+            "entryId": 1,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3836,
+            "type": "STRING",
+            "positions": [
+              0,
+              38,
+              12,
+              0,
+              0,
+              746,
+              11
+            ]
+          },
+          {
+            "entryId": 2,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3791,
+            "type": "STRING",
+            "positions": [
+              0,
+              78,
+              12,
+              0,
+              0,
+              1430,
+              95
+            ]
+          },
+          {
+            "entryId": 3,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3904,
+            "type": "STRING",
+            "positions": [
+              0,
+              118,
+              12,
+              0,
+              0,
+              2239,
+              23
+            ]
+          },
+          {
+            "entryId": 4,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3920,
+            "type": "STRING",
+            "positions": [
+              0,
+              158,
+              12,
+              0,
+              0,
+              2994,
+              17
+            ]
+          }
+        ],
+        "bloomFilterIndexes": [
+          {
+            "entryId": 0,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 1,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 2,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 3,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 4,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          }
+        ],
+        "stripeLevelBloomFilter": {
+          "numHashFunctions": 4,
+          "bitCount": 6272,
+          "popCount": 138,
+          "loadFactor": 0.022002551704645157,
+          "expectedFpp": 2.3436470542037569E-7
+        }
+      }]
+    },
+    {
+      "stripeNumber": 3,
+      "stripeInformation": {
+        "offset": 129417,
+        "indexLength": 853,
+        "dataLength": 63749,
+        "footerLength": 103,
+        "rowCount": 5000
+      },
+      "streams": [
+        {
+          "columnId": 0,
+          "section": "ROW_INDEX",
+          "startOffset": 129417,
+          "length": 17
+        },
+        {
+          "columnId": 1,
+          "section": "ROW_INDEX",
+          "startOffset": 129434,
+          "length": 160
+        },
+        {
+          "columnId": 2,
+          "section": "ROW_INDEX",
+          "startOffset": 129594,
+          "length": 170
+        },
+        {
+          "columnId": 3,
+          "section": "ROW_INDEX",
+          "startOffset": 129764,
+          "length": 102
+        },
+        {
+          "columnId": 3,
+          "section": "BLOOM_FILTER",
+          "startOffset": 129866,
+          "length": 404
+        },
+        {
+          "columnId": 1,
+          "section": "DATA",
+          "startOffset": 130270,
+          "length": 20029
+        },
+        {
+          "columnId": 2,
+          "section": "DATA",
+          "startOffset": 150299,
+          "length": 40035
+        },
+        {
+          "columnId": 3,
+          "section": "PRESENT",
+          "startOffset": 190334,
+          "length": 17
+        },
+        {
+          "columnId": 3,
+          "section": "DATA",
+          "startOffset": 190351,
+          "length": 3510
+        },
+        {
+          "columnId": 3,
+          "section": "LENGTH",
+          "startOffset": 193861,
+          "length": 25
+        },
+        {
+          "columnId": 3,
+          "section": "DICTIONARY_DATA",
+          "startOffset": 193886,
+          "length": 133
+        }
+      ],
+      "encodings": [
+        {
+          "columnId": 0,
+          "kind": "DIRECT"
+        },
+        {
+          "columnId": 1,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 2,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 3,
+          "kind": "DICTIONARY_V2",
+          "dictionarySize": 35
+        }
+      ],
+      "indexes": [{
+        "columnId": 3,
+        "rowGroupIndexes": [
+          {
+            "entryId": 0,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3829,
+            "type": "STRING",
+            "positions": [
+              0,
+              0,
+              0,
+              0,
+              0,
+              0,
+              0
+            ]
+          },
+          {
+            "entryId": 1,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3853,
+            "type": "STRING",
+            "positions": [
+              0,
+              38,
+              12,
+              0,
+              0,
+              698,
+              74
+            ]
+          },
+          {
+            "entryId": 2,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3796,
+            "type": "STRING",
+            "positions": [
+              0,
+              78,
+              12,
+              0,
+              0,
+              1483,
+              39
+            ]
+          },
+          {
+            "entryId": 3,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3736,
+            "type": "STRING",
+            "positions": [
+              0,
+              118,
+              12,
+              0,
+              0,
+              2148,
+              155
+            ]
+          },
+          {
+            "entryId": 4,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3817,
+            "type": "STRING",
+            "positions": [
+              0,
+              158,
+              12,
+              0,
+              0,
+              3018,
+              8
+            ]
+          }
+        ],
+        "bloomFilterIndexes": [
+          {
+            "entryId": 0,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 1,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 2,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 3,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 4,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          }
+        ],
+        "stripeLevelBloomFilter": {
+          "numHashFunctions": 4,
+          "bitCount": 6272,
+          "popCount": 138,
+          "loadFactor": 0.022002551704645157,
+          "expectedFpp": 2.3436470542037569E-7
+        }
+      }]
+    },
+    {
+      "stripeNumber": 4,
+      "stripeInformation": {
+        "offset": 194122,
+        "indexLength": 866,
+        "dataLength": 63735,
+        "footerLength": 103,
+        "rowCount": 5000
+      },
+      "streams": [
+        {
+          "columnId": 0,
+          "section": "ROW_INDEX",
+          "startOffset": 194122,
+          "length": 17
+        },
+        {
+          "columnId": 1,
+          "section": "ROW_INDEX",
+          "startOffset": 194139,
+          "length": 164
+        },
+        {
+          "columnId": 2,
+          "section": "ROW_INDEX",
+          "startOffset": 194303,
+          "length": 174
+        },
+        {
+          "columnId": 3,
+          "section": "ROW_INDEX",
+          "startOffset": 194477,
+          "length": 107
+        },
+        {
+          "columnId": 3,
+          "section": "BLOOM_FILTER",
+          "startOffset": 194584,
+          "length": 404
+        },
+        {
+          "columnId": 1,
+          "section": "DATA",
+          "startOffset": 194988,
+          "length": 20029
+        },
+        {
+          "columnId": 2,
+          "section": "DATA",
+          "startOffset": 215017,
+          "length": 40035
+        },
+        {
+          "columnId": 3,
+          "section": "PRESENT",
+          "startOffset": 255052,
+          "length": 17
+        },
+        {
+          "columnId": 3,
+          "section": "DATA",
+          "startOffset": 255069,
+          "length": 3496
+        },
+        {
+          "columnId": 3,
+          "section": "LENGTH",
+          "startOffset": 258565,
+          "length": 25
+        },
+        {
+          "columnId": 3,
+          "section": "DICTIONARY_DATA",
+          "startOffset": 258590,
+          "length": 133
+        }
+      ],
+      "encodings": [
+        {
+          "columnId": 0,
+          "kind": "DIRECT"
+        },
+        {
+          "columnId": 1,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 2,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 3,
+          "kind": "DICTIONARY_V2",
+          "dictionarySize": 35
+        }
+      ],
+      "indexes": [{
+        "columnId": 3,
+        "rowGroupIndexes": [
+          {
+            "entryId": 0,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3959,
+            "type": "STRING",
+            "positions": [
+              0,
+              0,
+              0,
+              0,
+              0,
+              0,
+              0
+            ]
+          },
+          {
+            "entryId": 1,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3816,
+            "type": "STRING",
+            "positions": [
+              0,
+              38,
+              12,
+              0,
+              0,
+              495,
+              338
+            ]
+          },
+          {
+            "entryId": 2,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3883,
+            "type": "STRING",
+            "positions": [
+              0,
+              78,
+              12,
+              0,
+              0,
+              1449,
+              71
+            ]
+          },
+          {
+            "entryId": 3,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3938,
+            "type": "STRING",
+            "positions": [
+              0,
+              118,
+              12,
+              0,
+              0,
+              2207,
+              59
+            ]
+          },
+          {
+            "entryId": 4,
+            "count": 990,
+            "hasNull": true,
+            "min": "Darkness,",
+            "max": "worst",
+            "totalLength": 3863,
+            "type": "STRING",
+            "positions": [
+              0,
+              158,
+              12,
+              0,
+              0,
+              2838,
+              223
+            ]
+          }
+        ],
+        "bloomFilterIndexes": [
+          {
+            "entryId": 0,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 1,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 2,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 3,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          },
+          {
+            "entryId": 4,
+            "numHashFunctions": 4,
+            "bitCount": 6272,
+            "popCount": 138,
+            "loadFactor": 0.022002551704645157,
+            "expectedFpp": 2.3436470542037569E-7
+          }
+        ],
+        "stripeLevelBloomFilter": {
+          "numHashFunctions": 4,
+          "bitCount": 6272,
+          "popCount": 138,
+          "loadFactor": 0.022002551704645157,
+          "expectedFpp": 2.3436470542037569E-7
+        }
+      }]
+    },
+    {
+      "stripeNumber": 5,
+      "stripeInformation": {
+        "offset": 258826,
+        "indexLength": 433,
+        "dataLength": 12940,
+        "footerLength": 95,
+        "rowCount": 1000
+      },
+      "streams": [
+        {
+          "columnId": 0,
+          "section": "ROW_INDEX",
+          "startOffset": 258826,
+          "length": 12
+        },
+        {
+          "columnId": 1,
+          "section": "ROW_INDEX",
+          "startOffset": 258838,
+          "length": 38
+        },
+        {
+          "columnId": 2,
+          "section": "ROW_INDEX",
+          "startOffset": 258876,
+          "length": 41
+        },
+        {
+          "columnId": 3,
+          "section": "ROW_INDEX",
+          "startOffset": 258917,
+          "length": 41
+        },
+        {
+          "columnId": 3,
+          "section": "BLOOM_FILTER",
+          "startOffset": 258958,
+          "length": 301
+        },
+        {
+          "columnId": 1,
+          "section": "DATA",
+          "startOffset": 259259,
+          "length": 4007
+        },
+        {
+          "columnId": 2,
+          "section": "DATA",
+          "startOffset": 263266,
+          "length": 8007
+        },
+        {
+          "columnId": 3,
+          "section": "PRESENT",
+          "startOffset": 271273,
+          "length": 16
+        },
+        {
+          "columnId": 3,
+          "section": "DATA",
+          "startOffset": 271289,
+          "length": 752
+        },
+        {
+          "columnId": 3,
+          "section": "LENGTH",
+          "startOffset": 272041,
+          "length": 25
+        },
+        {
+          "columnId": 3,
+          "section": "DICTIONARY_DATA",
+          "startOffset": 272066,
+          "length": 133
+        }
+      ],
+      "encodings": [
+        {
+          "columnId": 0,
+          "kind": "DIRECT"
+        },
+        {
+          "columnId": 1,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 2,
+          "kind": "DIRECT_V2"
+        },
+        {
+          "columnId": 3,
+          "kind": "DICTIONARY_V2",
+          "dictionarySize": 35
+        }
+      ],
+      "indexes": [{
+        "columnId": 3,
+        "rowGroupIndexes": [{
+          "entryId": 0,
+          "count": 990,
+          "hasNull": true,
+          "min": "Darkness,",
+          "max": "worst",
+          "totalLength": 3963,
+          "type": "STRING",
+          "positions": [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+          ]
+        }],
+        "bloomFilterIndexes": [{
+          "entryId": 0,
+          "numHashFunctions": 4,
+          "bitCount": 6272,
+          "popCount": 138,
+          "loadFactor": 0.022002551704645157,
+          "expectedFpp": 2.3436470542037569E-7
+        }],
+        "stripeLevelBloomFilter": {
+          "numHashFunctions": 4,
+          "bitCount": 6272,
+          "popCount": 138,
+          "loadFactor": 0.022002551704645157,
+          "expectedFpp": 2.3436470542037569E-7
+        }
+      }]
+    }
+  ],
+  "fileLength": 272842,
+  "paddingLength": 0,
+  "paddingRatio": 0
+}