You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/09/17 20:46:51 UTC

parquet-mr git commit: PARQUET-360: Handle all map key types with cat tool's json dump

Repository: parquet-mr
Updated Branches:
  refs/heads/master 66e39fc7d -> 0637e2fbc


PARQUET-360: Handle all map key types with cat tool's json dump

When dumping a parquet map with `parquet-cat --json` it throws a class cast exception as it doesn't properly handle all map key types.

```
java.lang.ClassCastException: [B cannot be cast to java.lang.String
	at org.apache.parquet.tools.read.SimpleMapRecord.toJsonObject(SimpleMapRecord.java:34)
	at org.apache.parquet.tools.read.SimpleRecord.toJsonValue(SimpleRecord.java:119)
	at org.apache.parquet.tools.read.SimpleRecord.toJsonObject(SimpleRecord.java:112)
	at org.apache.parquet.tools.read.SimpleRecord.prettyPrintJson(SimpleRecord.java:106)
	at org.apache.parquet.tools.command.CatCommand.execute(CatCommand.java:76)
	at org.apache.parquet.tools.Main.main(Main.java:222)
[B cannot be cast to java.lang.String
```

Author: Nezih Yigitbasi <ny...@netflix.com>

Closes #259 from nezihyigitbasi/parquet-cat-json and squashes the following commits:

d047502 [Nezih Yigitbasi] Add unit test
e4cd545 [Nezih Yigitbasi] Get rid of deprecated methods
bdc8fdf [Nezih Yigitbasi] Handle all map key types with cat tool's json dump


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/0637e2fb
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/0637e2fb
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/0637e2fb

Branch: refs/heads/master
Commit: 0637e2fbcd401f47bb062d5c2d1cceddabf372b7
Parents: 66e39fc
Author: Nezih Yigitbasi <ny...@netflix.com>
Authored: Thu Sep 17 11:46:41 2015 -0700
Committer: Ryan Blue <bl...@apache.org>
Committed: Thu Sep 17 11:46:41 2015 -0700

----------------------------------------------------------------------
 .../parquet/tools/command/CatCommand.java       |  2 +-
 .../parquet/tools/command/DumpCommand.java      |  5 +-
 .../parquet/tools/command/HeadCommand.java      |  2 +-
 .../tools/command/ShowSchemaCommand.java        |  4 +-
 .../parquet/tools/read/SimpleMapRecord.java     | 51 ++++++++++++++++--
 .../parquet/tools/read/TestSimpleMapRecord.java | 56 ++++++++++++++++++++
 6 files changed, 112 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
index e11cf2b..b988eca 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java
@@ -70,7 +70,7 @@ public class CatCommand extends ArgsOnlyCommand {
     ParquetReader<SimpleRecord> reader = null;
     try {
       PrintWriter writer = new PrintWriter(Main.out, true);
-      reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
+      reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
       for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
         if (options.hasOption('j')) {
           value.prettyPrintJson(writer);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
index 1388ed3..94d5002 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
@@ -47,6 +47,7 @@ import org.apache.parquet.column.page.DataPageV2;
 import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -62,6 +63,8 @@ import org.apache.parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler;
 
 import com.google.common.base.Joiner;
 
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
+
 public class DumpCommand extends ArgsOnlyCommand {
     private static final Charset UTF8 = Charset.forName("UTF-8");
     private static final CharsetDecoder UTF8_DECODER = UTF8.newDecoder();
@@ -115,7 +118,7 @@ public class DumpCommand extends ArgsOnlyCommand {
         Configuration conf = new Configuration();
         Path inpath = new Path(input);
 
-        ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
+        ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
         MessageType schema = metaData.getFileMetaData().getSchema();
 
         PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
index ec50b4e..b5d2c89 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java
@@ -78,7 +78,7 @@ public class HeadCommand extends ArgsOnlyCommand {
     ParquetReader<SimpleRecord> reader = null;
     try {
       PrintWriter writer = new PrintWriter(Main.out, true);
-      reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
+      reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
       for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
         value.prettyPrint(writer);
         writer.println();

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
index c093c72..40831ba 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
@@ -37,6 +37,8 @@ import org.apache.parquet.tools.Main;
 import org.apache.parquet.tools.util.MetadataUtils;
 import org.apache.parquet.tools.util.PrettyPrintWriter;
 
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
+
 public class ShowSchemaCommand extends ArgsOnlyCommand {
   public static final String[] USAGE = new String[] {
     "<input>",
@@ -88,7 +90,7 @@ public class ShowSchemaCommand extends ArgsOnlyCommand {
     } else {
       file = path;
     }
-    metaData = ParquetFileReader.readFooter(conf, file);
+    metaData = ParquetFileReader.readFooter(conf, file, NO_FILTER);
     MessageType schema = metaData.getFileMetaData().getSchema();
 
     Main.out.println(schema);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
index 9b9243e..043299d 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java
@@ -19,7 +19,9 @@
 package org.apache.parquet.tools.read;
 
 import com.google.common.collect.Maps;
+import org.codehaus.jackson.node.BinaryNode;
 
+import java.util.Arrays;
 import java.util.Map;
 
 public class SimpleMapRecord extends SimpleRecord {
@@ -30,14 +32,55 @@ public class SimpleMapRecord extends SimpleRecord {
       String key = null;
       Object val = null;
       for (NameValue kv : ((SimpleRecord) value.getValue()).values) {
-        if (kv.getName().equals("key")) {
-          key = (String) kv.getValue();
-        } else if (kv.getName().equals("value")) {
-          val = toJsonValue(kv.getValue());
+        String kvName = kv.getName();
+        Object kvValue = kv.getValue();
+        if (kvName.equals("key")) {
+          key = keyToString(kvValue);
+        } else if (kvName.equals("value")) {
+          val = toJsonValue(kvValue);
         }
       }
       result.put(key, val);
     }
     return result;
   }
+
+  String keyToString(Object kvValue) {
+    if (kvValue == null) {
+      return "null";
+    }
+
+    Class<?> type = kvValue.getClass();
+    if (type.isArray()) {
+      if (type.getComponentType() == boolean.class) {
+        return Arrays.toString((boolean[]) kvValue);
+      }
+      else if (type.getComponentType() == byte.class) {
+        return new BinaryNode((byte[]) kvValue).asText();
+      }
+      else if (type.getComponentType() == char.class) {
+        return Arrays.toString((char[]) kvValue);
+      }
+      else if (type.getComponentType() == double.class) {
+        return Arrays.toString((double[]) kvValue);
+      }
+      else if (type.getComponentType() == float.class) {
+        return Arrays.toString((float[]) kvValue);
+      }
+      else if (type.getComponentType() == int.class) {
+        return Arrays.toString((int[]) kvValue);
+      }
+      else if (type.getComponentType() == long.class) {
+        return Arrays.toString((long[]) kvValue);
+      }
+      else if (type.getComponentType() == short.class) {
+        return Arrays.toString((short[]) kvValue);
+      }
+      else {
+        return Arrays.toString((Object[]) kvValue);
+      }
+    } else {
+      return String.valueOf(kvValue);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/0637e2fb/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java
new file mode 100644
index 0000000..31920fe
--- /dev/null
+++ b/parquet-tools/src/test/java/org/apache/parquet/tools/read/TestSimpleMapRecord.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.tools.read;
+
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestSimpleMapRecord {
+
+  class TestRecord {
+    private int x;
+    private int y;
+
+    public TestRecord(int x, int y) {
+      this.x = x;
+      this.y = y;
+    }
+
+    @Override
+    public String toString() {
+      return "TestRecord {" + x + "," + y + "}";
+    }
+  }
+
+  @Test
+  public void testBinary() {
+    SimpleMapRecord r = new SimpleMapRecord();
+    Assert.assertEquals("null", r.keyToString(null));
+    Assert.assertEquals("[true, false, true]", r.keyToString(new boolean[]{true, false, true}));
+    Assert.assertEquals("[a, z]", r.keyToString(new char[] { 'a', 'z' }));
+    Assert.assertEquals("[1.0, 3.0]", r.keyToString(new double[]{1.0, 3.0 }));
+    Assert.assertEquals("[2.0, 4.0]", r.keyToString(new float[]{2.0f, 4.0f }));
+    Assert.assertEquals("[100, 999]", r.keyToString(new int[]{100, 999 }));
+    Assert.assertEquals("[23, 37]", r.keyToString(new long[] { 23l, 37l }));
+    Assert.assertEquals("[-1, -2]", r.keyToString(new short[]{(short) -1, (short) -2}));
+    Assert.assertEquals("dGVzdA==", r.keyToString("test".getBytes()));
+    Assert.assertEquals("TestRecord {222,333}", r.keyToString(new TestRecord(222, 333)));
+  }
+}