You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/09/17 20:46:51 UTC
parquet-mr git commit: PARQUET-360: Handle all map key types with cat
tool's json dump
Repository: parquet-mr
Updated Branches:
refs/heads/master 66e39fc7d -> 0637e2fbc
PARQUET-360: Handle all map key types with cat tool's json dump
When dumping a parquet map with `parquet-cat --json` it throws a class cast exception as it doesn't properly handle all map key types.
```
java.lang.ClassCastException: [B cannot be cast to java.lang.String
at org.apache.parquet.tools.read.SimpleMapRecord.toJsonObject(SimpleMapRecord.java:34)
at org.apache.parquet.tools.read.SimpleRecord.toJsonValue(SimpleRecord.java:119)
at org.apache.parquet.tools.read.SimpleRecord.toJsonObject(SimpleRecord.java:112)
at org.apache.parquet.tools.read.SimpleRecord.prettyPrintJson(SimpleRecord.java:106)
at org.apache.parquet.tools.command.CatCommand.execute(CatCommand.java:76)
at org.apache.parquet.tools.Main.main(Main.java:222)
[B cannot be cast to java.lang.String
```
Author: Nezih Yigitbasi <ny...@netflix.com>
Closes #259 from nezihyigitbasi/parquet-cat-json and squashes the following commits:
d047502 [Nezih Yigitbasi] Add unit test
e4cd545 [Nezih Yigitbasi] Get rid of deprecated methods
bdc8fdf [Nezih Yigitbasi] Handle all map key types with cat tool's json dump
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/0637e2fb
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/0637e2fb
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/0637e2fb
Branch: refs/heads/master
Commit: 0637e2fbcd401f47bb062d5c2d1cceddabf372b7
Parents: 66e39fc
Author: Nezih Yigitbasi <ny...@netflix.com>
Authored: Thu Sep 17 11:46:41 2015 -0700
Committer: Ryan Blue <bl...@apache.org>
Committed: Thu Sep 17 11:46:41 2015 -0700
----------------------------------------------------------------------
.../parquet/tools/command/CatCommand.java | 2 +-
.../parquet/tools/command/DumpCommand.java | 5 +-
.../parquet/tools/command/HeadCommand.java | 2 +-
.../tools/command/ShowSchemaCommand.java | 4 +-
.../parquet/tools/read/SimpleMapRecord.java | 51 ++++++++++++++++--
.../parquet/tools/read/TestSimpleMapRecord.java | 56 ++++++++++++++++++++
6 files changed, 112 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
index e11cf2b..b988eca 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
@@ -70,7 +70,7 @@ public class CatCommand extends ArgsOnlyCommand {
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
- reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
+ reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
if (options.hasOption('j')) {
value.prettyPrintJson(writer);
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
index 1388ed3..94d5002 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
@@ -47,6 +47,7 @@ import org.apache.parquet.column.page.DataPageV2;
import org.apache.parquet.column.page.DictionaryPage;
import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -62,6 +63,8 @@ import org.apache.parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler;
import com.google.common.base.Joiner;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
+
public class DumpCommand extends ArgsOnlyCommand {
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final CharsetDecoder UTF8_DECODER = UTF8.newDecoder();
@@ -115,7 +118,7 @@ public class DumpCommand extends ArgsOnlyCommand {
Configuration conf = new Configuration();
Path inpath = new Path(input);
- ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
+ ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
MessageType schema = metaData.getFileMetaData().getSchema();
PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
index ec50b4e..b5d2c89 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
@@ -78,7 +78,7 @@ public class HeadCommand extends ArgsOnlyCommand {
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
- reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
+ reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
value.prettyPrint(writer);
writer.println();
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
index c093c72..40831ba 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
@@ -37,6 +37,8 @@ import org.apache.parquet.tools.Main;
import org.apache.parquet.tools.util.MetadataUtils;
import org.apache.parquet.tools.util.PrettyPrintWriter;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
+
public class ShowSchemaCommand extends ArgsOnlyCommand {
public static final String[] USAGE = new String[] {
"<input>",
@@ -88,7 +90,7 @@ public class ShowSchemaCommand extends ArgsOnlyCommand {
} else {
file = path;
}
- metaData = ParquetFileReader.readFooter(conf, file);
+ metaData = ParquetFileReader.readFooter(conf, file, NO_FILTER);
MessageType schema = metaData.getFileMetaData().getSchema();
Main.out.println(schema);
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
index 9b9243e..043299d 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
@@ -19,7 +19,9 @@
package org.apache.parquet.tools.read;
import com.google.common.collect.Maps;
+import org.codehaus.jackson.node.BinaryNode;
+import java.util.Arrays;
import java.util.Map;
public class SimpleMapRecord extends SimpleRecord {
@@ -30,14 +32,55 @@ public class SimpleMapRecord extends SimpleRecord {
String key = null;
Object val = null;
for (NameValue kv : ((SimpleRecord) value.getValue()).values) {
- if (kv.getName().equals("key")) {
- key = (String) kv.getValue();
- } else if (kv.getName().equals("value")) {
- val = toJsonValue(kv.getValue());
+ String kvName = kv.getName();
+ Object kvValue = kv.getValue();
+ if (kvName.equals("key")) {
+ key = keyToString(kvValue);
+ } else if (kvName.equals("value")) {
+ val = toJsonValue(kvValue);
}
}
result.put(key, val);
}
return result;
}
+
+ String keyToString(Object kvValue) {
+ if (kvValue == null) {
+ return "null";
+ }
+
+ Class<?> type = kvValue.getClass();
+ if (type.isArray()) {
+ if (type.getComponentType() == boolean.class) {
+ return Arrays.toString((boolean[]) kvValue);
+ }
+ else if (type.getComponentType() == byte.class) {
+ return new BinaryNode((byte[]) kvValue).asText();
+ }
+ else if (type.getComponentType() == char.class) {
+ return Arrays.toString((char[]) kvValue);
+ }
+ else if (type.getComponentType() == double.class) {
+ return Arrays.toString((double[]) kvValue);
+ }
+ else if (type.getComponentType() == float.class) {
+ return Arrays.toString((float[]) kvValue);
+ }
+ else if (type.getComponentType() == int.class) {
+ return Arrays.toString((int[]) kvValue);
+ }
+ else if (type.getComponentType() == long.class) {
+ return Arrays.toString((long[]) kvValue);
+ }
+ else if (type.getComponentType() == short.class) {
+ return Arrays.toString((short[]) kvValue);
+ }
+ else {
+ return Arrays.toString((Object[]) kvValue);
+ }
+ } else {
+ return String.valueOf(kvValue);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java
new file mode 100644
index 0000000..31920fe
--- /dev/null
+++ b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.tools.read;
+
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestSimpleMapRecord {
+
+ class TestRecord {
+ private int x;
+ private int y;
+
+ public TestRecord(int x, int y) {
+ this.x = x;
+ this.y = y;
+ }
+
+ @Override
+ public String toString() {
+ return "TestRecord {" + x + "," + y + "}";
+ }
+ }
+
+ @Test
+ public void testBinary() {
+ SimpleMapRecord r = new SimpleMapRecord();
+ Assert.assertEquals("null", r.keyToString(null));
+ Assert.assertEquals("[true, false, true]", r.keyToString(new boolean[]{true, false, true}));
+ Assert.assertEquals("[a, z]", r.keyToString(new char[] { 'a', 'z' }));
+ Assert.assertEquals("[1.0, 3.0]", r.keyToString(new double[]{1.0, 3.0 }));
+ Assert.assertEquals("[2.0, 4.0]", r.keyToString(new float[]{2.0f, 4.0f }));
+ Assert.assertEquals("[100, 999]", r.keyToString(new int[]{100, 999 }));
+ Assert.assertEquals("[23, 37]", r.keyToString(new long[] { 23l, 37l }));
+ Assert.assertEquals("[-1, -2]", r.keyToString(new short[]{(short) -1, (short) -2}));
+ Assert.assertEquals("dGVzdA==", r.keyToString("test".getBytes()));
+ Assert.assertEquals("TestRecord {222,333}", r.keyToString(new TestRecord(222, 333)));
+ }
+}