You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2018/03/20 21:59:36 UTC

orc git commit: ORC-321. Add pretty print option to the JSON schema finder tool.

Repository: orc
Updated Branches:
  refs/heads/master 7dfe4a748 -> 3f23d507c


ORC-321. Add pretty print option to the JSON schema finder tool.

Fixes #230

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/3f23d507
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/3f23d507
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/3f23d507

Branch: refs/heads/master
Commit: 3f23d507c39be3503e73f516a451f47e0c4ee25e
Parents: 7dfe4a7
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Mar 12 13:36:48 2018 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Mar 20 14:58:48 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/orc/TypeDescription.java    |   4 +-
 .../apache/orc/TypeDescriptionPrettyPrint.java  | 131 +++++++++++++++++++
 .../apache/orc/tools/json/JsonSchemaFinder.java |   5 +
 3 files changed, 138 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/3f23d507/java/core/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index 86d88ff..d7e81cd 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -46,7 +46,7 @@ public class TypeDescription
   private static final int DEFAULT_PRECISION = 38;
   private static final int DEFAULT_SCALE = 10;
   private static final int DEFAULT_LENGTH = 256;
-  private static final Pattern UNQUOTED_NAMES = Pattern.compile("^\\w+$");
+  static final Pattern UNQUOTED_NAMES = Pattern.compile("^[a-zA-Z0-9_]+$");
 
   @Override
   public int compareTo(TypeDescription other) {

http://git-wip-us.apache.org/repos/asf/orc/blob/3f23d507/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java b/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java
new file mode 100644
index 0000000..0714224
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import java.io.PrintStream;
+import java.util.List;
+
+/**
+ * A pretty printer for TypeDescription.
+ */
+public class TypeDescriptionPrettyPrint {
+
+  static void pad(PrintStream output, int offset) {
+    for(int i=0; i < offset; ++i) {
+      output.print(' ');
+    }
+  }
+
+  static void printFieldName(PrintStream output, String fieldName){
+    if (TypeDescription.UNQUOTED_NAMES.matcher(fieldName).matches()) {
+      output.print(fieldName);
+    } else {
+      output.print('`');
+      output.print(fieldName.replaceAll("`", "``"));
+      output.print('`');
+    }
+  }
+
+  static void printStruct(PrintStream output,
+                          int offset,
+                          TypeDescription type) {
+    output.print("<");
+    List<TypeDescription> children = type.getChildren();
+    List<String> fields = type.getFieldNames();
+    for(int c = 0; c < children.size(); ++c) {
+      if (c == 0) {
+        output.println();
+      } else {
+        output.println(",");
+      }
+      pad(output, offset + 2);
+      printFieldName(output, fields.get(c));
+      output.print(':');
+      printType(output, offset + 2, children.get(c));
+    }
+    output.print('>');
+  }
+
+  static void printComplex(PrintStream output,
+                           int offset,
+                           TypeDescription type) {
+    output.print("<");
+    List<TypeDescription> children = type.getChildren();
+    for(int c = 0; c < children.size(); ++c) {
+      if (c != 0) {
+        output.print(",");
+      }
+      printType(output, offset + 2, children.get(c));
+    }
+    output.print('>');
+  }
+
+  static void printType(PrintStream output,
+                        int offset,
+                        TypeDescription type) {
+    output.print(type.getCategory().getName());
+    switch (type.getCategory()) {
+      case BOOLEAN:
+      case BINARY:
+      case BYTE:
+      case DATE:
+      case DOUBLE:
+      case FLOAT:
+      case INT:
+      case LONG:
+      case SHORT:
+      case STRING:
+      case TIMESTAMP:
+        break;
+
+      case DECIMAL:
+        output.print('(');
+        output.print(type.getPrecision());
+        output.print(',');
+        output.print(type.getScale());
+        output.print(')');
+        break;
+
+      case CHAR:
+      case VARCHAR:
+        output.print('(');
+        output.print(type.getMaxLength());
+        output.print(')');
+        break;
+
+      case STRUCT:
+        printStruct(output, offset, type);
+        break;
+
+      case LIST:
+      case MAP:
+      case UNION:
+        printComplex(output, offset, type);
+        break;
+
+      default:
+        throw new IllegalArgumentException("Unhandled type " + type);
+    }
+  }
+
+  public static void print(PrintStream output,
+                           TypeDescription schema) {
+    printType(output, 0, schema);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3f23d507/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
index de36254..8b53ee1 100644
--- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
+++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
@@ -31,6 +31,7 @@ import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.TypeDescriptionPrettyPrint;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -335,6 +336,8 @@ public class JsonSchemaFinder {
       result.mergedType.printFlat(System.out, "root");
     } else if (cli.hasOption('t')) {
       printAsTable(System.out, (StructType) result.mergedType);
+    } else if (cli.hasOption('p')) {
+      TypeDescriptionPrettyPrint.print(System.out, result.getSchema());
     } else {
       System.out.println(result.getSchema());
     }
@@ -349,6 +352,8 @@ public class JsonSchemaFinder {
         .desc("Print types as flat list of types").build());
     options.addOption(Option.builder("t").longOpt("table")
         .desc("Print types as Hive table declaration").build());
+    options.addOption(Option.builder("p").longOpt("pretty")
+        .desc("Pretty print the schema").build());
     CommandLine cli = new GnuParser().parse(options, args);
     if (cli.hasOption('h') || cli.getArgs().length == 0) {
       HelpFormatter formatter = new HelpFormatter();