You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/19 16:23:05 UTC

[orc] branch main updated: ORC-871: Provide better error information when parsing json files with exceptions (#874)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new b813c47  ORC-871: Provide better error information when parsing json files with exceptions (#874)
b813c47 is described below

commit b813c471174e82767b08b165a9daa0319644262c
Author: guiyanakaung <gu...@gmail.com>
AuthorDate: Fri Aug 20 00:23:01 2021 +0800

    ORC-871: Provide better error information when parsing json files with exceptions (#874)
    
    ### What changes were proposed in this pull request?
    
    Catching exceptions and printing messages.
    
    1. Special prompt for empty file
    2. Prompt for which file is being processed
    3. Prompt for which record is being processed
    
    ### Why are the changes needed?
    
    Provide more valid information.
    
    ### How was this patch tested?
    
    Pass the CIs.
---
 .../org/apache/orc/tools/convert/ConvertTool.java  |  2 +-
 .../apache/orc/tools/json/JsonSchemaFinder.java    | 38 +++++++++++++++++++---
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
index 268efea..7bacde8 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
@@ -69,7 +69,7 @@ public class ConvertTool {
       if (file.format == Format.JSON) {
         System.err.println("Scanning " + file.path + " for schema");
         filesScanned += 1;
-        schemaFinder.addFile(file.getReader(file.filesystem.open(file.path)));
+        schemaFinder.addFile(file.getReader(file.filesystem.open(file.path)), file.path.getName());
       } else if (file.format == Format.ORC) {
         System.err.println("Merging schema from " + file.path);
         filesScanned += 1;
diff --git a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
index 1d911a0..3aea9b5 100644
--- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
+++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
@@ -20,6 +20,7 @@ package org.apache.orc.tools.json;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonElement;
 import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
 import com.google.gson.JsonPrimitive;
 import com.google.gson.JsonStreamParser;
 import org.apache.commons.cli.CommandLine;
@@ -28,6 +29,7 @@ import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.TypeDescriptionPrettyPrint;
@@ -247,15 +249,41 @@ public class JsonSchemaFinder {
     } else {
       reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
     }
-    addFile(reader);
+    addFile(reader, filename);
   }
 
-  public void addFile(java.io.Reader reader) throws IOException {
+  public void addFile(java.io.Reader reader, String filename) {
     JsonStreamParser parser = new JsonStreamParser(reader);
-    while (parser.hasNext()) {
-      records += 1;
-      mergedType = mergeType(mergedType, pickType(parser.next()));
+    try {
+      while (parser.hasNext()) {
+        mergedType = mergeType(mergedType, pickType(parser.next()));
+        records += 1;
+      }
+    } catch (JsonParseException e) {
+      printParseExceptionMsg(e, filename);
+    }
+  }
+
+  private void printParseExceptionMsg(JsonParseException e, String filename) {
+    System.err.printf(
+        "A JsonParseException was thrown while processing the %dth record of file %s.%n",
+        records + 1, filename);
+
+    String pattern = "at line (\\d+) column (\\d+)";
+    Pattern r = Pattern.compile(pattern);
+    Matcher m = r.matcher(e.getMessage());
+    int line;
+    int column;
+    if (m.find( )) {
+      line = Integer.parseInt(m.group(1));
+      column = Integer.parseInt(m.group(2));
+      if (line == 1 && column == 1) {
+        System.err.printf("File %s is empty.%n", filename);
+        System.exit(1);
+      }
     }
+    System.err.printf("Please check the file.%n%n%s%n", ExceptionUtils.getStackTrace(e));
+    System.exit(1);
   }
 
   HiveType makeHiveType(TypeDescription schema) {