You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/05/07 03:52:35 UTC
hive git commit: HIVE-10592: ORC file dump in JSON format (Prasanth
Jayachandran reviewed by Gopal V)
Repository: hive
Updated Branches:
refs/heads/master 93995c8be -> 80fb89131
HIVE-10592: ORC file dump in JSON format (Prasanth Jayachandran reviewed by Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/80fb8913
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/80fb8913
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/80fb8913
Branch: refs/heads/master
Commit: 80fb8913196eef8e4125544c3138b0c73be267b7
Parents: 93995c8
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed May 6 18:52:17 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed May 6 18:52:17 2015 -0700
----------------------------------------------------------------------
bin/ext/orcfiledump.sh | 9 +-
.../hive/ql/io/orc/ColumnStatisticsImpl.java | 16 +-
.../apache/hadoop/hive/ql/io/orc/FileDump.java | 91 +-
.../hadoop/hive/ql/io/orc/JsonFileDump.java | 365 +++++
.../hadoop/hive/ql/io/orc/TestJsonFileDump.java | 138 ++
ql/src/test/resources/orc-file-dump.json | 1354 ++++++++++++++++++
6 files changed, 1929 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/bin/ext/orcfiledump.sh
----------------------------------------------------------------------
diff --git a/bin/ext/orcfiledump.sh b/bin/ext/orcfiledump.sh
index 752e437..6139de2 100644
--- a/bin/ext/orcfiledump.sh
+++ b/bin/ext/orcfiledump.sh
@@ -23,5 +23,12 @@ orcfiledump () {
}
orcfiledump_help () {
- echo "usage ./hive orcfiledump [-d] [--rowindex <col_ids>] <path_to_file>"
+ echo "usage ./hive orcfiledump [-h] [-j] [-p] [-t] [-d] [-r <col_ids>] <path_to_file>"
+ echo ""
+ echo " --json (-j) Print metadata in JSON format"
+ echo " --pretty (-p) Pretty print json metadata output"
+ echo " --timezone (-t) Print writer's time zone"
+ echo " --data (-d) Should the data be printed"
+ echo " --rowindex (-r) <_col_ids_> Comma separated list of column ids for which row index should be printed"
+ echo " --help (-h) Print help message"
}
http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
index 7cfbd81..ffba3c6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
@@ -699,12 +699,18 @@ class ColumnStatisticsImpl implements ColumnStatistics {
@Override
public Date getMinimum() {
+ if (minimum == null) {
+ return null;
+ }
minDate.set(minimum);
return minDate.get();
}
@Override
public Date getMaximum() {
+ if (maximum == null) {
+ return null;
+ }
maxDate.set(maximum);
return maxDate.get();
}
@@ -793,14 +799,12 @@ class ColumnStatisticsImpl implements ColumnStatistics {
@Override
public Timestamp getMinimum() {
- Timestamp minTimestamp = new Timestamp(minimum);
- return minTimestamp;
+ return minimum == null ? null : new Timestamp(minimum);
}
@Override
public Timestamp getMaximum() {
- Timestamp maxTimestamp = new Timestamp(maximum);
- return maxTimestamp;
+ return maximum == null ? null : new Timestamp(maximum);
}
@Override
@@ -808,9 +812,9 @@ class ColumnStatisticsImpl implements ColumnStatistics {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
- buf.append(minimum);
+ buf.append(getMinimum());
buf.append(" max: ");
- buf.append(maximum);
+ buf.append(getMaximum());
}
return buf.toString();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index cd4db75..33c4cd8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -50,10 +50,11 @@ import org.codehaus.jettison.json.JSONWriter;
* A tool for printing out the file structure of ORC files.
*/
public final class FileDump {
- private static final String UNKNOWN = "UNKNOWN";
+ public static final String UNKNOWN = "UNKNOWN";
// not used
- private FileDump() {}
+ private FileDump() {
+ }
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
@@ -69,21 +70,28 @@ public final class FileDump {
}
boolean dumpData = cli.hasOption('d');
- if (cli.hasOption("rowindex")) {
- String[] colStrs = cli.getOptionValue("rowindex").split(",");
+ if (cli.hasOption("r")) {
+ String[] colStrs = cli.getOptionValue("r").split(",");
rowIndexCols = new ArrayList<Integer>(colStrs.length);
for (String colStr : colStrs) {
rowIndexCols.add(Integer.parseInt(colStr));
}
}
- boolean printTimeZone = false;
- if (cli.hasOption('t')) {
- printTimeZone = true;
- }
+ boolean printTimeZone = cli.hasOption('t');
+ boolean jsonFormat = cli.hasOption('j');
String[] files = cli.getArgs();
- if (dumpData) printData(Arrays.asList(files), conf);
- else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+ if (dumpData) {
+ printData(Arrays.asList(files), conf);
+ } else {
+ if (jsonFormat) {
+ boolean prettyPrint = cli.hasOption('p');
+ JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint,
+ printTimeZone);
+ } else {
+ printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
+ }
+ }
}
private static void printData(List<String> files, Configuration conf) throws IOException,
@@ -100,7 +108,7 @@ public final class FileDump {
Path path = new Path(filename);
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
System.out.println("File Version: " + reader.getFileVersion().getName() +
- " with " + reader.getWriterVersion());
+ " with " + reader.getWriterVersion());
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompression());
@@ -121,7 +129,7 @@ public final class FileDump {
ColumnStatistics[] stats = reader.getStatistics();
int colCount = stats.length;
System.out.println("\nFile Statistics:");
- for(int i=0; i < stats.length; ++i) {
+ for (int i = 0; i < stats.length; ++i) {
System.out.println(" Column " + i + ": " + stats[i].toString());
}
System.out.println("\nStripes:");
@@ -140,7 +148,7 @@ public final class FileDump {
System.out.println(" Stripe: " + stripe.toString());
}
long sectionStart = stripeStart;
- for(OrcProto.Stream section: footer.getStreamsList()) {
+ for (OrcProto.Stream section : footer.getStreamsList()) {
String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
System.out.println(" Stream: column " + section.getColumn() +
" section " + kind + " start: " + sectionStart +
@@ -270,7 +278,7 @@ public final class FileDump {
return buf.toString();
}
- private static long getTotalPaddingSize(Reader reader) throws IOException {
+ public static long getTotalPaddingSize(Reader reader) throws IOException {
long paddedBytes = 0;
List<org.apache.hadoop.hive.ql.io.orc.StripeInformation> stripes = reader.getStripes();
for (int i = 1; i < stripes.size(); i++) {
@@ -307,21 +315,30 @@ public final class FileDump {
.withArgName("comma separated list of column ids for which row index should be printed")
.withDescription("Dump stats for column number(s)")
.hasArg()
- .create());
+ .create('r'));
+
+ result.addOption(OptionBuilder
+ .withLongOpt("json")
+ .withDescription("Print metadata in JSON format")
+ .create('j'));
+ result.addOption(OptionBuilder
+ .withLongOpt("pretty")
+ .withDescription("Pretty print json metadata output")
+ .create('p'));
return result;
}
private static void printMap(JSONWriter writer,
- Map<Object, Object> obj,
- List<OrcProto.Type> types,
- OrcProto.Type type
+ Map<Object, Object> obj,
+ List<OrcProto.Type> types,
+ OrcProto.Type type
) throws IOException, JSONException {
writer.array();
int keyType = type.getSubtypes(0);
int valueType = type.getSubtypes(1);
- for(Map.Entry<Object,Object> item: obj.entrySet()) {
+ for (Map.Entry<Object, Object> item : obj.entrySet()) {
writer.object();
writer.key("_key");
printObject(writer, item.getKey(), types, keyType);
@@ -333,34 +350,34 @@ public final class FileDump {
}
private static void printList(JSONWriter writer,
- List<Object> obj,
- List<OrcProto.Type> types,
- OrcProto.Type type
+ List<Object> obj,
+ List<OrcProto.Type> types,
+ OrcProto.Type type
) throws IOException, JSONException {
int subtype = type.getSubtypes(0);
writer.array();
- for(Object item: obj) {
+ for (Object item : obj) {
printObject(writer, item, types, subtype);
}
writer.endArray();
}
private static void printUnion(JSONWriter writer,
- OrcUnion obj,
- List<OrcProto.Type> types,
- OrcProto.Type type
+ OrcUnion obj,
+ List<OrcProto.Type> types,
+ OrcProto.Type type
) throws IOException, JSONException {
int subtype = type.getSubtypes(obj.getTag());
printObject(writer, obj.getObject(), types, subtype);
}
static void printStruct(JSONWriter writer,
- OrcStruct obj,
- List<OrcProto.Type> types,
- OrcProto.Type type) throws IOException, JSONException {
+ OrcStruct obj,
+ List<OrcProto.Type> types,
+ OrcProto.Type type) throws IOException, JSONException {
writer.object();
List<Integer> fieldTypes = type.getSubtypesList();
- for(int i=0; i < fieldTypes.size(); ++i) {
+ for (int i = 0; i < fieldTypes.size(); ++i) {
writer.key(type.getFieldNames(i));
printObject(writer, obj.getFieldValue(i), types, fieldTypes.get(i));
}
@@ -368,9 +385,9 @@ public final class FileDump {
}
static void printObject(JSONWriter writer,
- Object obj,
- List<OrcProto.Type> types,
- int typeId) throws IOException, JSONException {
+ Object obj,
+ List<OrcProto.Type> types,
+ int typeId) throws IOException, JSONException {
OrcProto.Type type = types.get(typeId);
if (obj == null) {
writer.value(null);
@@ -417,7 +434,7 @@ public final class FileDump {
}
static void printJsonData(Configuration conf,
- String filename) throws IOException, JSONException {
+ String filename) throws IOException, JSONException {
Path path = new Path(filename);
Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
OutputStreamWriter out = new OutputStreamWriter(System.out, "UTF-8");
http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
new file mode 100644
index 0000000..c33004e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
@@ -0,0 +1,365 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONStringer;
+import org.codehaus.jettison.json.JSONWriter;
+
+/**
+ * File dump tool with json formatted output.
+ */
+public class JsonFileDump {
+
+ public static void printJsonMetaData(List<String> files, Configuration conf,
+ List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone) throws JSONException, IOException {
+ JSONStringer writer = new JSONStringer();
+ boolean multiFile = files.size() > 1;
+ if (multiFile) {
+ writer.array();
+ } else {
+ writer.object();
+ }
+ for (String filename : files) {
+ if (multiFile) {
+ writer.object();
+ }
+ writer.key("fileName").value(filename);
+ Path path = new Path(filename);
+ Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+ writer.key("fileVersion").value(reader.getFileVersion().getName());
+ writer.key("writerVersion").value(reader.getWriterVersion());
+ RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+ writer.key("numberOfRows").value(reader.getNumberOfRows());
+ writer.key("compression").value(reader.getCompression());
+ if (reader.getCompression() != CompressionKind.NONE) {
+ writer.key("compressionBufferSize").value(reader.getCompressionSize());
+ }
+ writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
+ writer.key("schema").array();
+ writeSchema(writer, reader.getTypes());
+ writer.endArray();
+
+ writer.key("stripeStatistics").array();
+ Metadata metadata = reader.getMetadata();
+ for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
+ writer.object();
+ writer.key("stripeNumber").value(n + 1);
+ StripeStatistics ss = metadata.getStripeStatistics().get(n);
+ writer.key("columnStatistics").array();
+ for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+ writer.object();
+ writer.key("columnId").value(i);
+ writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+ writer.endObject();
+ }
+ writer.endArray();
+ writer.endObject();
+ }
+ writer.endArray();
+
+ ColumnStatistics[] stats = reader.getStatistics();
+ int colCount = stats.length;
+ writer.key("fileStatistics").array();
+ for (int i = 0; i < stats.length; ++i) {
+ writer.object();
+ writer.key("columnId").value(i);
+ writeColumnStatistics(writer, stats[i]);
+ writer.endObject();
+ }
+ writer.endArray();
+
+ writer.key("stripes").array();
+ int stripeIx = -1;
+ for (StripeInformation stripe : reader.getStripes()) {
+ ++stripeIx;
+ long stripeStart = stripe.getOffset();
+ OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+ writer.object(); // start of stripe information
+ writer.key("stripeNumber").value(stripeIx + 1);
+ writer.key("stripeInformation");
+ writeStripeInformation(writer, stripe);
+ if (printTimeZone) {
+ writer.key("writerTimezone").value(
+ footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
+ }
+ long sectionStart = stripeStart;
+
+ writer.key("streams").array();
+ for (OrcProto.Stream section : footer.getStreamsList()) {
+ writer.object();
+ String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
+ writer.key("columnId").value(section.getColumn());
+ writer.key("section").value(kind);
+ writer.key("startOffset").value(sectionStart);
+ writer.key("length").value(section.getLength());
+ sectionStart += section.getLength();
+ writer.endObject();
+ }
+ writer.endArray();
+
+ writer.key("encodings").array();
+ for (int i = 0; i < footer.getColumnsCount(); ++i) {
+ writer.object();
+ OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+ writer.key("columnId").value(i);
+ writer.key("kind").value(encoding.getKind());
+ if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+ encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+ writer.key("dictionarySize").value(encoding.getDictionarySize());
+ }
+ writer.endObject();
+ }
+ writer.endArray();
+
+ if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+ // include the columns that are specified, only if the columns are included, bloom filter
+ // will be read
+ boolean[] sargColumns = new boolean[colCount];
+ for (int colIdx : rowIndexCols) {
+ sargColumns[colIdx] = true;
+ }
+ RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
+ writer.key("indexes").array();
+ for (int col : rowIndexCols) {
+ writer.object();
+ writer.key("columnId").value(col);
+ writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+ writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+ writer.endObject(); // end of stripe information
+ }
+ writer.endArray();
+
+ FileSystem fs = path.getFileSystem(conf);
+ long fileLen = fs.getContentSummary(path).getLength();
+ long paddedBytes = FileDump.getTotalPaddingSize(reader);
+ // empty ORC file is ~45 bytes. Assumption here is file length always >0
+ double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+ writer.key("fileLength").value(fileLen);
+ writer.key("paddingLength").value(paddedBytes);
+ writer.key("paddingRatio").value(percentPadding);
+ rows.close();
+
+ writer.endObject();
+ }
+ if (multiFile) {
+ writer.endArray();
+ }
+
+ if (prettyPrint) {
+ final String prettyJson;
+ if (multiFile) {
+ JSONArray jsonArray = new JSONArray(writer.toString());
+ prettyJson = jsonArray.toString(2);
+ } else {
+ JSONObject jsonObject = new JSONObject(writer.toString());
+ prettyJson = jsonObject.toString(2);
+ }
+ System.out.println(prettyJson);
+ } else {
+ System.out.println(writer.toString());
+ }
+ }
+
+ private static void writeSchema(JSONStringer writer, List<OrcProto.Type> types)
+ throws JSONException {
+ int i = 0;
+ for(OrcProto.Type type : types) {
+ writer.object();
+ writer.key("columnId").value(i++);
+ writer.key("columnType").value(type.getKind());
+ if (type.getFieldNamesCount() > 0) {
+ writer.key("childColumnNames").array();
+ for (String field : type.getFieldNamesList()) {
+ writer.value(field);
+ }
+ writer.endArray();
+ writer.key("childColumnIds").array();
+ for (Integer colId : type.getSubtypesList()) {
+ writer.value(colId);
+ }
+ writer.endArray();
+ }
+ if (type.hasPrecision()) {
+ writer.key("precision").value(type.getPrecision());
+ }
+
+ if (type.hasScale()) {
+ writer.key("scale").value(type.getScale());
+ }
+
+ if (type.hasMaximumLength()) {
+ writer.key("maxLength").value(type.getMaximumLength());
+ }
+ writer.endObject();
+ }
+ }
+
+ private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe)
+ throws JSONException {
+ writer.object();
+ writer.key("offset").value(stripe.getOffset());
+ writer.key("indexLength").value(stripe.getIndexLength());
+ writer.key("dataLength").value(stripe.getDataLength());
+ writer.key("footerLength").value(stripe.getFooterLength());
+ writer.key("rowCount").value(stripe.getNumberOfRows());
+ writer.endObject();
+ }
+
+ private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs)
+ throws JSONException {
+ if (cs != null) {
+ writer.key("count").value(cs.getNumberOfValues());
+ writer.key("hasNull").value(cs.hasNull());
+ if (cs instanceof BinaryColumnStatistics) {
+ writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum());
+ writer.key("type").value(OrcProto.Type.Kind.BINARY);
+ } else if (cs instanceof BooleanColumnStatistics) {
+ writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount());
+ writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount());
+ writer.key("type").value(OrcProto.Type.Kind.BOOLEAN);
+ } else if (cs instanceof IntegerColumnStatistics) {
+ writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum());
+ if (((IntegerColumnStatistics) cs).isSumDefined()) {
+ writer.key("sum").value(((IntegerColumnStatistics) cs).getSum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.LONG);
+ } else if (cs instanceof DoubleColumnStatistics) {
+ writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum());
+ writer.key("sum").value(((DoubleColumnStatistics) cs).getSum());
+ writer.key("type").value(OrcProto.Type.Kind.DOUBLE);
+ } else if (cs instanceof StringColumnStatistics) {
+ writer.key("min").value(((StringColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((StringColumnStatistics) cs).getMaximum());
+ writer.key("totalLength").value(((StringColumnStatistics) cs).getSum());
+ writer.key("type").value(OrcProto.Type.Kind.STRING);
+ } else if (cs instanceof DateColumnStatistics) {
+ if (((DateColumnStatistics) cs).getMaximum() != null) {
+ writer.key("min").value(((DateColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((DateColumnStatistics) cs).getMaximum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.DATE);
+ } else if (cs instanceof TimestampColumnStatistics) {
+ if (((TimestampColumnStatistics) cs).getMaximum() != null) {
+ writer.key("min").value(((TimestampColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((TimestampColumnStatistics) cs).getMaximum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.TIMESTAMP);
+ } else if (cs instanceof DecimalColumnStatistics) {
+ if (((DecimalColumnStatistics) cs).getMaximum() != null) {
+ writer.key("min").value(((DecimalColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((DecimalColumnStatistics) cs).getMaximum());
+ writer.key("sum").value(((DecimalColumnStatistics) cs).getSum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.DECIMAL);
+ }
+ }
+ }
+
+ private static void writeBloomFilterIndexes(JSONWriter writer, int col,
+ OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException {
+
+ BloomFilterIO stripeLevelBF = null;
+ if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
+ int entryIx = 0;
+ writer.key("bloomFilterIndexes").array();
+ for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
+ writer.object();
+ writer.key("entryId").value(entryIx++);
+ BloomFilterIO toMerge = new BloomFilterIO(bf);
+ writeBloomFilterStats(writer, toMerge);
+ if (stripeLevelBF == null) {
+ stripeLevelBF = toMerge;
+ } else {
+ stripeLevelBF.merge(toMerge);
+ }
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+ if (stripeLevelBF != null) {
+ writer.key("stripeLevelBloomFilter");
+ writer.object();
+ writeBloomFilterStats(writer, stripeLevelBF);
+ writer.endObject();
+ }
+ }
+
+ private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf)
+ throws JSONException {
+ int bitCount = bf.getBitSize();
+ int popCount = 0;
+ for (long l : bf.getBitSet()) {
+ popCount += Long.bitCount(l);
+ }
+ int k = bf.getNumHashFunctions();
+ float loadFactor = (float) popCount / (float) bitCount;
+ float expectedFpp = (float) Math.pow(loadFactor, k);
+ writer.key("numHashFunctions").value(k);
+ writer.key("bitCount").value(bitCount);
+ writer.key("popCount").value(popCount);
+ writer.key("loadFactor").value(loadFactor);
+ writer.key("expectedFpp").value(expectedFpp);
+ }
+
+ private static void writeRowGroupIndexes(JSONWriter writer, int col,
+ OrcProto.RowIndex[] rowGroupIndex)
+ throws JSONException {
+
+ OrcProto.RowIndex index;
+ if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
+ ((index = rowGroupIndex[col]) == null)) {
+ return;
+ }
+
+ writer.key("rowGroupIndexes").array();
+ for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
+ writer.object();
+ writer.key("entryId").value(entryIx);
+ OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
+ if (entry == null) {
+ continue;
+ }
+ OrcProto.ColumnStatistics colStats = entry.getStatistics();
+ writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
+ writer.key("positions").array();
+ for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
+ writer.value(entry.getPositions(posIx));
+ }
+ writer.endArray();
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
new file mode 100644
index 0000000..d17c528
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestJsonFileDump.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestJsonFileDump {
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir"));
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+
+ @Before
+ public void openFileSystem () throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ testFilePath = new Path("TestFileDump.testDump.orc");
+ fs.delete(testFilePath, false);
+ }
+
+ static class MyRecord {
+ int i;
+ long l;
+ String s;
+ MyRecord(int i, long l, String s) {
+ this.i = i;
+ this.l = l;
+ this.s = s;
+ }
+ }
+
+ static void checkOutput(String expected,
+ String actual) throws Exception {
+ BufferedReader eStream =
+ new BufferedReader(new FileReader(HiveTestUtils.getFileFromClasspath(expected)));
+ BufferedReader aStream =
+ new BufferedReader(new FileReader(actual));
+ String expectedLine = eStream.readLine();
+ while (expectedLine != null) {
+ String actualLine = aStream.readLine();
+ System.out.println("actual: " + actualLine);
+ System.out.println("expected: " + expectedLine);
+ assertEquals(expectedLine, actualLine);
+ expectedLine = eStream.readLine();
+ }
+ assertNull(eStream.readLine());
+ assertNull(aStream.readLine());
+ }
+
+ @Test
+ public void testJsonDump() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ conf.set(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname, "COMPRESSION");
+ OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+ .fileSystem(fs)
+ .inspector(inspector)
+ .stripeSize(100000)
+ .compress(CompressionKind.ZLIB)
+ .bufferSize(10000)
+ .rowIndexStride(1000)
+ .bloomFilterColumns("s");
+ Writer writer = OrcFile.createWriter(testFilePath, options);
+ Random r1 = new Random(1);
+ String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+ "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+ "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+ "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+ "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+ "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+ "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+ "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+ "before", "us,", "we", "were", "all", "going", "direct", "to",
+ "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+ "way"};
+ for(int i=0; i < 21000; ++i) {
+ if (i % 100 == 0) {
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(), null));
+ } else {
+ writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+ words[r1.nextInt(words.length)]));
+ }
+ }
+
+ writer.close();
+ PrintStream origOut = System.out;
+ String outputFilename = "orc-file-dump.json";
+ FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut));
+ FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"});
+ System.out.flush();
+ System.setOut(origOut);
+
+
+ checkOutput(outputFilename, workDir + File.separator + outputFilename);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/80fb8913/ql/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json
new file mode 100644
index 0000000..125a32e
--- /dev/null
+++ b/ql/src/test/resources/orc-file-dump.json
@@ -0,0 +1,1354 @@
+{
+ "fileName": "TestFileDump.testDump.orc",
+ "fileVersion": "0.12",
+ "writerVersion": "HIVE_8732",
+ "numberOfRows": 21000,
+ "compression": "ZLIB",
+ "compressionBufferSize": 10000,
+ "schemaString": "struct<i:int,l:bigint,s:string>",
+ "schema": [
+ {
+ "columnId": 0,
+ "columnType": "STRUCT",
+ "childColumnNames": [
+ "i",
+ "l",
+ "s"
+ ],
+ "childColumnIds": [
+ 1,
+ 2,
+ 3
+ ]
+ },
+ {
+ "columnId": 1,
+ "columnType": "INT"
+ },
+ {
+ "columnId": 2,
+ "columnType": "LONG"
+ },
+ {
+ "columnId": 3,
+ "columnType": "STRING"
+ }
+ ],
+ "stripeStatistics": [
+ {
+ "stripeNumber": 1,
+ "columnStatistics": [
+ {
+ "columnId": 0,
+ "count": 5000,
+ "hasNull": false
+ },
+ {
+ "columnId": 1,
+ "count": 5000,
+ "hasNull": false,
+ "min": -2147115959,
+ "max": 2145210552,
+ "sum": 50111854553,
+ "type": "LONG"
+ },
+ {
+ "columnId": 2,
+ "count": 5000,
+ "hasNull": false,
+ "min": -9223180583305557329,
+ "max": 9221614132680747961,
+ "type": "LONG"
+ },
+ {
+ "columnId": 3,
+ "count": 4950,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 19283,
+ "type": "STRING"
+ }
+ ]
+ },
+ {
+ "stripeNumber": 2,
+ "columnStatistics": [
+ {
+ "columnId": 0,
+ "count": 5000,
+ "hasNull": false
+ },
+ {
+ "columnId": 1,
+ "count": 5000,
+ "hasNull": false,
+ "min": -2147390285,
+ "max": 2147224606,
+ "sum": -22290798217,
+ "type": "LONG"
+ },
+ {
+ "columnId": 2,
+ "count": 5000,
+ "hasNull": false,
+ "min": -9219295160509160427,
+ "max": 9217571024994660020,
+ "type": "LONG"
+ },
+ {
+ "columnId": 3,
+ "count": 4950,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 19397,
+ "type": "STRING"
+ }
+ ]
+ },
+ {
+ "stripeNumber": 3,
+ "columnStatistics": [
+ {
+ "columnId": 0,
+ "count": 5000,
+ "hasNull": false
+ },
+ {
+ "columnId": 1,
+ "count": 5000,
+ "hasNull": false,
+ "min": -2146954065,
+ "max": 2146722468,
+ "sum": 20639652136,
+ "type": "LONG"
+ },
+ {
+ "columnId": 2,
+ "count": 5000,
+ "hasNull": false,
+ "min": -9214076359988107846,
+ "max": 9222919052987871506,
+ "type": "LONG"
+ },
+ {
+ "columnId": 3,
+ "count": 4950,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 19031,
+ "type": "STRING"
+ }
+ ]
+ },
+ {
+ "stripeNumber": 4,
+ "columnStatistics": [
+ {
+ "columnId": 0,
+ "count": 5000,
+ "hasNull": false
+ },
+ {
+ "columnId": 1,
+ "count": 5000,
+ "hasNull": false,
+ "min": -2146969085,
+ "max": 2146025044,
+ "sum": -5156814387,
+ "type": "LONG"
+ },
+ {
+ "columnId": 2,
+ "count": 5000,
+ "hasNull": false,
+ "min": -9222731174895935707,
+ "max": 9220625004936875965,
+ "type": "LONG"
+ },
+ {
+ "columnId": 3,
+ "count": 4950,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 19459,
+ "type": "STRING"
+ }
+ ]
+ },
+ {
+ "stripeNumber": 5,
+ "columnStatistics": [
+ {
+ "columnId": 0,
+ "count": 1000,
+ "hasNull": false
+ },
+ {
+ "columnId": 1,
+ "count": 1000,
+ "hasNull": false,
+ "min": -2144303438,
+ "max": 2127599049,
+ "sum": 62841564778,
+ "type": "LONG"
+ },
+ {
+ "columnId": 2,
+ "count": 1000,
+ "hasNull": false,
+ "min": -9195133638801798919,
+ "max": 9218626063131504414,
+ "type": "LONG"
+ },
+ {
+ "columnId": 3,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3963,
+ "type": "STRING"
+ }
+ ]
+ }
+ ],
+ "fileStatistics": [
+ {
+ "columnId": 0,
+ "count": 21000,
+ "hasNull": false
+ },
+ {
+ "columnId": 1,
+ "count": 21000,
+ "hasNull": false,
+ "min": -2147390285,
+ "max": 2147224606,
+ "sum": 106145458863,
+ "type": "LONG"
+ },
+ {
+ "columnId": 2,
+ "count": 21000,
+ "hasNull": false,
+ "min": -9223180583305557329,
+ "max": 9222919052987871506,
+ "type": "LONG"
+ },
+ {
+ "columnId": 3,
+ "count": 20790,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 81133,
+ "type": "STRING"
+ }
+ ],
+ "stripes": [
+ {
+ "stripeNumber": 1,
+ "stripeInformation": {
+ "offset": 3,
+ "indexLength": 863,
+ "dataLength": 63749,
+ "footerLength": 103,
+ "rowCount": 5000
+ },
+ "streams": [
+ {
+ "columnId": 0,
+ "section": "ROW_INDEX",
+ "startOffset": 3,
+ "length": 17
+ },
+ {
+ "columnId": 1,
+ "section": "ROW_INDEX",
+ "startOffset": 20,
+ "length": 165
+ },
+ {
+ "columnId": 2,
+ "section": "ROW_INDEX",
+ "startOffset": 185,
+ "length": 174
+ },
+ {
+ "columnId": 3,
+ "section": "ROW_INDEX",
+ "startOffset": 359,
+ "length": 103
+ },
+ {
+ "columnId": 3,
+ "section": "BLOOM_FILTER",
+ "startOffset": 462,
+ "length": 404
+ },
+ {
+ "columnId": 1,
+ "section": "DATA",
+ "startOffset": 866,
+ "length": 20029
+ },
+ {
+ "columnId": 2,
+ "section": "DATA",
+ "startOffset": 20895,
+ "length": 40035
+ },
+ {
+ "columnId": 3,
+ "section": "PRESENT",
+ "startOffset": 60930,
+ "length": 17
+ },
+ {
+ "columnId": 3,
+ "section": "DATA",
+ "startOffset": 60947,
+ "length": 3510
+ },
+ {
+ "columnId": 3,
+ "section": "LENGTH",
+ "startOffset": 64457,
+ "length": 25
+ },
+ {
+ "columnId": 3,
+ "section": "DICTIONARY_DATA",
+ "startOffset": 64482,
+ "length": 133
+ }
+ ],
+ "encodings": [
+ {
+ "columnId": 0,
+ "kind": "DIRECT"
+ },
+ {
+ "columnId": 1,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 2,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 3,
+ "kind": "DICTIONARY_V2",
+ "dictionarySize": 35
+ }
+ ],
+ "indexes": [{
+ "columnId": 3,
+ "rowGroupIndexes": [
+ {
+ "entryId": 0,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3873,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ]
+ },
+ {
+ "entryId": 1,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3861,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 38,
+ 12,
+ 0,
+ 0,
+ 736,
+ 23
+ ]
+ },
+ {
+ "entryId": 2,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3946,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 78,
+ 12,
+ 0,
+ 0,
+ 1473,
+ 43
+ ]
+ },
+ {
+ "entryId": 3,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3774,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 118,
+ 12,
+ 0,
+ 0,
+ 2067,
+ 261
+ ]
+ },
+ {
+ "entryId": 4,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3829,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 158,
+ 12,
+ 0,
+ 0,
+ 2992,
+ 35
+ ]
+ }
+ ],
+ "bloomFilterIndexes": [
+ {
+ "entryId": 0,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 1,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 2,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 3,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 4,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ ],
+ "stripeLevelBloomFilter": {
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ }]
+ },
+ {
+ "stripeNumber": 2,
+ "stripeInformation": {
+ "offset": 64718,
+ "indexLength": 854,
+ "dataLength": 63742,
+ "footerLength": 103,
+ "rowCount": 5000
+ },
+ "streams": [
+ {
+ "columnId": 0,
+ "section": "ROW_INDEX",
+ "startOffset": 64718,
+ "length": 17
+ },
+ {
+ "columnId": 1,
+ "section": "ROW_INDEX",
+ "startOffset": 64735,
+ "length": 164
+ },
+ {
+ "columnId": 2,
+ "section": "ROW_INDEX",
+ "startOffset": 64899,
+ "length": 169
+ },
+ {
+ "columnId": 3,
+ "section": "ROW_INDEX",
+ "startOffset": 65068,
+ "length": 100
+ },
+ {
+ "columnId": 3,
+ "section": "BLOOM_FILTER",
+ "startOffset": 65168,
+ "length": 404
+ },
+ {
+ "columnId": 1,
+ "section": "DATA",
+ "startOffset": 65572,
+ "length": 20029
+ },
+ {
+ "columnId": 2,
+ "section": "DATA",
+ "startOffset": 85601,
+ "length": 40035
+ },
+ {
+ "columnId": 3,
+ "section": "PRESENT",
+ "startOffset": 125636,
+ "length": 17
+ },
+ {
+ "columnId": 3,
+ "section": "DATA",
+ "startOffset": 125653,
+ "length": 3503
+ },
+ {
+ "columnId": 3,
+ "section": "LENGTH",
+ "startOffset": 129156,
+ "length": 25
+ },
+ {
+ "columnId": 3,
+ "section": "DICTIONARY_DATA",
+ "startOffset": 129181,
+ "length": 133
+ }
+ ],
+ "encodings": [
+ {
+ "columnId": 0,
+ "kind": "DIRECT"
+ },
+ {
+ "columnId": 1,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 2,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 3,
+ "kind": "DICTIONARY_V2",
+ "dictionarySize": 35
+ }
+ ],
+ "indexes": [{
+ "columnId": 3,
+ "rowGroupIndexes": [
+ {
+ "entryId": 0,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3946,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ]
+ },
+ {
+ "entryId": 1,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3836,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 38,
+ 12,
+ 0,
+ 0,
+ 746,
+ 11
+ ]
+ },
+ {
+ "entryId": 2,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3791,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 78,
+ 12,
+ 0,
+ 0,
+ 1430,
+ 95
+ ]
+ },
+ {
+ "entryId": 3,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3904,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 118,
+ 12,
+ 0,
+ 0,
+ 2239,
+ 23
+ ]
+ },
+ {
+ "entryId": 4,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3920,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 158,
+ 12,
+ 0,
+ 0,
+ 2994,
+ 17
+ ]
+ }
+ ],
+ "bloomFilterIndexes": [
+ {
+ "entryId": 0,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 1,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 2,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 3,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 4,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ ],
+ "stripeLevelBloomFilter": {
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ }]
+ },
+ {
+ "stripeNumber": 3,
+ "stripeInformation": {
+ "offset": 129417,
+ "indexLength": 853,
+ "dataLength": 63749,
+ "footerLength": 103,
+ "rowCount": 5000
+ },
+ "streams": [
+ {
+ "columnId": 0,
+ "section": "ROW_INDEX",
+ "startOffset": 129417,
+ "length": 17
+ },
+ {
+ "columnId": 1,
+ "section": "ROW_INDEX",
+ "startOffset": 129434,
+ "length": 160
+ },
+ {
+ "columnId": 2,
+ "section": "ROW_INDEX",
+ "startOffset": 129594,
+ "length": 170
+ },
+ {
+ "columnId": 3,
+ "section": "ROW_INDEX",
+ "startOffset": 129764,
+ "length": 102
+ },
+ {
+ "columnId": 3,
+ "section": "BLOOM_FILTER",
+ "startOffset": 129866,
+ "length": 404
+ },
+ {
+ "columnId": 1,
+ "section": "DATA",
+ "startOffset": 130270,
+ "length": 20029
+ },
+ {
+ "columnId": 2,
+ "section": "DATA",
+ "startOffset": 150299,
+ "length": 40035
+ },
+ {
+ "columnId": 3,
+ "section": "PRESENT",
+ "startOffset": 190334,
+ "length": 17
+ },
+ {
+ "columnId": 3,
+ "section": "DATA",
+ "startOffset": 190351,
+ "length": 3510
+ },
+ {
+ "columnId": 3,
+ "section": "LENGTH",
+ "startOffset": 193861,
+ "length": 25
+ },
+ {
+ "columnId": 3,
+ "section": "DICTIONARY_DATA",
+ "startOffset": 193886,
+ "length": 133
+ }
+ ],
+ "encodings": [
+ {
+ "columnId": 0,
+ "kind": "DIRECT"
+ },
+ {
+ "columnId": 1,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 2,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 3,
+ "kind": "DICTIONARY_V2",
+ "dictionarySize": 35
+ }
+ ],
+ "indexes": [{
+ "columnId": 3,
+ "rowGroupIndexes": [
+ {
+ "entryId": 0,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3829,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ]
+ },
+ {
+ "entryId": 1,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3853,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 38,
+ 12,
+ 0,
+ 0,
+ 698,
+ 74
+ ]
+ },
+ {
+ "entryId": 2,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3796,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 78,
+ 12,
+ 0,
+ 0,
+ 1483,
+ 39
+ ]
+ },
+ {
+ "entryId": 3,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3736,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 118,
+ 12,
+ 0,
+ 0,
+ 2148,
+ 155
+ ]
+ },
+ {
+ "entryId": 4,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3817,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 158,
+ 12,
+ 0,
+ 0,
+ 3018,
+ 8
+ ]
+ }
+ ],
+ "bloomFilterIndexes": [
+ {
+ "entryId": 0,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 1,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 2,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 3,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 4,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ ],
+ "stripeLevelBloomFilter": {
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ }]
+ },
+ {
+ "stripeNumber": 4,
+ "stripeInformation": {
+ "offset": 194122,
+ "indexLength": 866,
+ "dataLength": 63735,
+ "footerLength": 103,
+ "rowCount": 5000
+ },
+ "streams": [
+ {
+ "columnId": 0,
+ "section": "ROW_INDEX",
+ "startOffset": 194122,
+ "length": 17
+ },
+ {
+ "columnId": 1,
+ "section": "ROW_INDEX",
+ "startOffset": 194139,
+ "length": 164
+ },
+ {
+ "columnId": 2,
+ "section": "ROW_INDEX",
+ "startOffset": 194303,
+ "length": 174
+ },
+ {
+ "columnId": 3,
+ "section": "ROW_INDEX",
+ "startOffset": 194477,
+ "length": 107
+ },
+ {
+ "columnId": 3,
+ "section": "BLOOM_FILTER",
+ "startOffset": 194584,
+ "length": 404
+ },
+ {
+ "columnId": 1,
+ "section": "DATA",
+ "startOffset": 194988,
+ "length": 20029
+ },
+ {
+ "columnId": 2,
+ "section": "DATA",
+ "startOffset": 215017,
+ "length": 40035
+ },
+ {
+ "columnId": 3,
+ "section": "PRESENT",
+ "startOffset": 255052,
+ "length": 17
+ },
+ {
+ "columnId": 3,
+ "section": "DATA",
+ "startOffset": 255069,
+ "length": 3496
+ },
+ {
+ "columnId": 3,
+ "section": "LENGTH",
+ "startOffset": 258565,
+ "length": 25
+ },
+ {
+ "columnId": 3,
+ "section": "DICTIONARY_DATA",
+ "startOffset": 258590,
+ "length": 133
+ }
+ ],
+ "encodings": [
+ {
+ "columnId": 0,
+ "kind": "DIRECT"
+ },
+ {
+ "columnId": 1,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 2,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 3,
+ "kind": "DICTIONARY_V2",
+ "dictionarySize": 35
+ }
+ ],
+ "indexes": [{
+ "columnId": 3,
+ "rowGroupIndexes": [
+ {
+ "entryId": 0,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3959,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ]
+ },
+ {
+ "entryId": 1,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3816,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 38,
+ 12,
+ 0,
+ 0,
+ 495,
+ 338
+ ]
+ },
+ {
+ "entryId": 2,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3883,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 78,
+ 12,
+ 0,
+ 0,
+ 1449,
+ 71
+ ]
+ },
+ {
+ "entryId": 3,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3938,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 118,
+ 12,
+ 0,
+ 0,
+ 2207,
+ 59
+ ]
+ },
+ {
+ "entryId": 4,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3863,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 158,
+ 12,
+ 0,
+ 0,
+ 2838,
+ 223
+ ]
+ }
+ ],
+ "bloomFilterIndexes": [
+ {
+ "entryId": 0,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 1,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 2,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 3,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ },
+ {
+ "entryId": 4,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ ],
+ "stripeLevelBloomFilter": {
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ }]
+ },
+ {
+ "stripeNumber": 5,
+ "stripeInformation": {
+ "offset": 258826,
+ "indexLength": 433,
+ "dataLength": 12940,
+ "footerLength": 95,
+ "rowCount": 1000
+ },
+ "streams": [
+ {
+ "columnId": 0,
+ "section": "ROW_INDEX",
+ "startOffset": 258826,
+ "length": 12
+ },
+ {
+ "columnId": 1,
+ "section": "ROW_INDEX",
+ "startOffset": 258838,
+ "length": 38
+ },
+ {
+ "columnId": 2,
+ "section": "ROW_INDEX",
+ "startOffset": 258876,
+ "length": 41
+ },
+ {
+ "columnId": 3,
+ "section": "ROW_INDEX",
+ "startOffset": 258917,
+ "length": 41
+ },
+ {
+ "columnId": 3,
+ "section": "BLOOM_FILTER",
+ "startOffset": 258958,
+ "length": 301
+ },
+ {
+ "columnId": 1,
+ "section": "DATA",
+ "startOffset": 259259,
+ "length": 4007
+ },
+ {
+ "columnId": 2,
+ "section": "DATA",
+ "startOffset": 263266,
+ "length": 8007
+ },
+ {
+ "columnId": 3,
+ "section": "PRESENT",
+ "startOffset": 271273,
+ "length": 16
+ },
+ {
+ "columnId": 3,
+ "section": "DATA",
+ "startOffset": 271289,
+ "length": 752
+ },
+ {
+ "columnId": 3,
+ "section": "LENGTH",
+ "startOffset": 272041,
+ "length": 25
+ },
+ {
+ "columnId": 3,
+ "section": "DICTIONARY_DATA",
+ "startOffset": 272066,
+ "length": 133
+ }
+ ],
+ "encodings": [
+ {
+ "columnId": 0,
+ "kind": "DIRECT"
+ },
+ {
+ "columnId": 1,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 2,
+ "kind": "DIRECT_V2"
+ },
+ {
+ "columnId": 3,
+ "kind": "DICTIONARY_V2",
+ "dictionarySize": 35
+ }
+ ],
+ "indexes": [{
+ "columnId": 3,
+ "rowGroupIndexes": [{
+ "entryId": 0,
+ "count": 990,
+ "hasNull": true,
+ "min": "Darkness,",
+ "max": "worst",
+ "totalLength": 3963,
+ "type": "STRING",
+ "positions": [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
+ ]
+ }],
+ "bloomFilterIndexes": [{
+ "entryId": 0,
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }],
+ "stripeLevelBloomFilter": {
+ "numHashFunctions": 4,
+ "bitCount": 6272,
+ "popCount": 138,
+ "loadFactor": 0.022002551704645157,
+ "expectedFpp": 2.3436470542037569E-7
+ }
+ }]
+ }
+ ],
+ "fileLength": 272842,
+ "paddingLength": 0,
+ "paddingRatio": 0
+}