You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/19 16:24:24 UTC

[orc] branch branch-1.7 updated: ORC-871: Provide better error information when parsing json files with exceptions (#874)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new 55e0a84  ORC-871: Provide better error information when parsing json files with exceptions (#874)
55e0a84 is described below

commit 55e0a84c7aa072f112e530047e6dfe5db8069824
Author: guiyanakaung <gu...@gmail.com>
AuthorDate: Fri Aug 20 00:23:01 2021 +0800

    ORC-871: Provide better error information when parsing json files with exceptions (#874)
    
    ### What changes were proposed in this pull request?
    
    Catching exceptions and printing messages.
    
    1. Special prompt for empty file
    2. Prompt for which file is being processed
    3. Prompt for which record is being processed
    
    ### Why are the changes needed?
    
    Provide more valid information.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    (cherry picked from commit b813c471174e82767b08b165a9daa0319644262c)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../org/apache/orc/tools/convert/ConvertTool.java  |  2 +-
 .../apache/orc/tools/json/JsonSchemaFinder.java    | 38 +++++++++++++++++++---
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
index 268efea..7bacde8 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
@@ -69,7 +69,7 @@ public class ConvertTool {
       if (file.format == Format.JSON) {
         System.err.println("Scanning " + file.path + " for schema");
         filesScanned += 1;
-        schemaFinder.addFile(file.getReader(file.filesystem.open(file.path)));
+        schemaFinder.addFile(file.getReader(file.filesystem.open(file.path)), file.path.getName());
       } else if (file.format == Format.ORC) {
         System.err.println("Merging schema from " + file.path);
         filesScanned += 1;
diff --git a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
index 3af9815..68d39a5 100644
--- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
+++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
@@ -20,6 +20,7 @@ package org.apache.orc.tools.json;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonElement;
 import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
 import com.google.gson.JsonPrimitive;
 import com.google.gson.JsonStreamParser;
 import org.apache.commons.cli.CommandLine;
@@ -28,6 +29,7 @@ import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.TypeDescriptionPrettyPrint;
@@ -247,15 +249,41 @@ public class JsonSchemaFinder {
     } else {
       reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
     }
-    addFile(reader);
+    addFile(reader, filename);
   }
 
-  public void addFile(java.io.Reader reader) throws IOException {
+  public void addFile(java.io.Reader reader, String filename) {
     JsonStreamParser parser = new JsonStreamParser(reader);
-    while (parser.hasNext()) {
-      records += 1;
-      mergedType = mergeType(mergedType, pickType(parser.next()));
+    try {
+      while (parser.hasNext()) {
+        mergedType = mergeType(mergedType, pickType(parser.next()));
+        records += 1;
+      }
+    } catch (JsonParseException e) {
+      printParseExceptionMsg(e, filename);
+    }
+  }
+
+  private void printParseExceptionMsg(JsonParseException e, String filename) {
+    System.err.printf(
+        "A JsonParseException was thrown while processing the %dth record of file %s.%n",
+        records + 1, filename);
+
+    String pattern = "at line (\\d+) column (\\d+)";
+    Pattern r = Pattern.compile(pattern);
+    Matcher m = r.matcher(e.getMessage());
+    int line;
+    int column;
+    if (m.find( )) {
+      line = Integer.parseInt(m.group(1));
+      column = Integer.parseInt(m.group(2));
+      if (line == 1 && column == 1) {
+        System.err.printf("File %s is empty.%n", filename);
+        System.exit(1);
+      }
     }
+    System.err.printf("Please check the file.%n%n%s%n", ExceptionUtils.getStackTrace(e));
+    System.exit(1);
   }
 
   HiveType makeHiveType(TypeDescription schema) {