You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2021/05/20 20:45:19 UTC

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

From Michael Blow <mb...@apache.org>:

Michael Blow has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )


Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................

[NO ISSUE][MISC] Improve parser error reporting

Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
---
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
D asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
A hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
M hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
8 files changed, 68 insertions(+), 52 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/43/11543/1

diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 3a502d0..4c253bc 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -29,10 +29,10 @@
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class QuotedLineRecordReader extends LineRecordReader {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 2ff5cfa..0e23e46 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -40,10 +40,10 @@
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class SemiStructuredRecordReader extends StreamRecordReader {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
index 2bf0df4..2d20cc2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.parser;
 
+import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR;
+
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.BitSet;
@@ -46,6 +48,7 @@
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.util.ExceptionUtils;
 import org.apache.hyracks.data.std.api.IMutableValueStorage;
+import org.apache.hyracks.util.ParseUtil;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
@@ -433,18 +436,13 @@
             }
             long lineNum = lineNumber.getAsLong() + jsonParser.getCurrentLocation().getLineNr() - 1;
             JsonStreamContext parsingContext = jsonParser.getParsingContext();
-            String fieldName = "N/A";
-            while (parsingContext != null) {
-                String currentFieldName = parsingContext.getCurrentName();
-                if (currentFieldName != null) {
-                    fieldName = currentFieldName;
-                    break;
-                }
+            String fieldName = null;
+            while (parsingContext != null && fieldName == null) {
+                fieldName = parsingContext.getCurrentName();
                 parsingContext = parsingContext.getParent();
             }
-
-            return HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR,
-                    dataSourceName.get(), lineNum, fieldName, msg);
+            final String locationDetails = ParseUtil.asLocationDetailString(dataSourceName.get(), lineNum, fieldName);
+            return HyracksDataException.create(PARSING_ERROR, locationDetails, msg);
         }
         return new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
     }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 60e6e77..590f51d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -38,7 +38,6 @@
 import org.apache.asterix.external.api.IRecordDataParser;
 import org.apache.asterix.external.api.IStreamDataParser;
 import org.apache.asterix.external.util.ExternalDataConstants;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.asterix.om.base.AMutableString;
 import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
 import org.apache.asterix.om.types.ARecordType;
@@ -52,6 +51,7 @@
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
 import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
+import org.apache.hyracks.util.ParseUtil;
 
 public class DelimitedDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
deleted file mode 100644
index 5a46af7..0000000
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-import org.apache.hyracks.api.exceptions.ErrorCode;
-import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
-
-public class ParseUtil {
-
-    private ParseUtil() {
-    }
-
-    public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
-            String warnMessage) {
-        warningCollector
-                .warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName, lineNum, fieldNum, warnMessage));
-    }
-}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
new file mode 100644
index 0000000..63fec09
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util;
+
+import java.util.StringJoiner;
+
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+
+public class ParseUtil {
+
+    private ParseUtil() {
+    }
+
+    public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
+            String warnMessage) {
+        warningCollector.warn(Warning.of(null, ErrorCode.PARSING_ERROR,
+                asLocationDetailString(dataSourceName, lineNum, fieldNum), warnMessage));
+    }
+
+    public static String asLocationDetailString(String dataSource, long lineNum, Object fieldIdentifier) {
+        StringJoiner details = new StringJoiner(" ");
+        details.setEmptyValue("N/A");
+        if (dataSource != null && !dataSource.isEmpty()) {
+            details.add(dataSource);
+        }
+        if (lineNum >= 0) {
+            details.add("line " + lineNum);
+        }
+        if (fieldIdentifier instanceof Number) {
+            details.add("field " + fieldIdentifier);
+        } else if (fieldIdentifier instanceof String && !((String) fieldIdentifier).isEmpty()) {
+            details.add("field '" + fieldIdentifier + "'");
+        }
+        return "at " + details;
+    }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 9f04fb2..4d9c60b 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -141,7 +141,7 @@
 121 = A numeric type promotion error has occurred: %1$s
 122 = Encountered an error while printing the plan: %1$s
 123 = Insufficient memory is provided for the join operators, please increase the join memory budget.
-124 = Parsing error at %1$s line %2$s field %3$s: %4$s
+124 = Parsing error %s: %s
 125 = Invalid inverted list type traits: %1$s
 126 = Illegal state. %1$s
 
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 936d63e..ffc87cd 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -23,9 +23,8 @@
 import java.util.Arrays;
 import java.util.function.Supplier;
 
-import org.apache.hyracks.api.exceptions.ErrorCode;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.ParseUtil;
 
 public class FieldCursorForDelimitedDataParser {
 
@@ -448,6 +447,6 @@
     }
 
     private void warn(String message) {
-        warnings.warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName.get(), lineCount, fieldCount, message));
+        ParseUtil.warn(warnings, dataSourceName.get(), lineCount, fieldCount, message);
     }
 }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-MessageType: newchange

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Michael Blow <mb...@apache.org>:

Michael Blow has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................

[NO ISSUE][MISC] Improve parser error reporting

Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <mb...@apache.org>
Reviewed-by: Till Westmann <ti...@apache.org>
---
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
D asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
A hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
M hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
11 files changed, 88 insertions(+), 72 deletions(-)

Approvals:
  Till Westmann: Looks good to me, approved
  Michael Blow: Looks good to me, but someone else must approve
  Jenkins: Verified; Verified



diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
index df60e60..1e302da 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
@@ -143,11 +143,11 @@
       <compilation-unit name="common/malformed-json">
         <placeholder name="adapter" value="AZUREBLOB" />
         <output-dir compare="Text">common/malformed-json</output-dir>
-        <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field 'field': Duplicate field 'field'</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field 'field': Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 9bc463c..b354e65 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -92,11 +92,11 @@
       <compilation-unit name="common/malformed-json">
         <placeholder name="adapter" value="S3" />
         <output-dir compare="Text">common/malformed-json</output-dir>
-        <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
-        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field 'field': Duplicate field 'field'</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field 'field': Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+        <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
index 92b5e32..d02647d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
@@ -49,17 +49,17 @@
         <output-dir compare="Text">common/csv-warnings</output-dir>
         <expected-warn>Parsing error at data_dir/no_h_missing_fields.csv line 2 field 3: some fields are missing</expected-warn>
         <expected-warn>Parsing error at data_dir/no_h_no_closing_q.csv line 2 field 0: malformed input record ended abruptly</expected-warn>
-        <expected-warn>Parsing error at  line 2 field 0: malformed input record ended abruptly</expected-warn>
+        <expected-warn>Parsing error at line 2 field 0: malformed input record ended abruptly</expected-warn>
 
-        <expected-warn>Parsing error at  line 5 field 3: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 2 field 1: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 11 field 1: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 3 field 1: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 4 field 1: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 7 field 7: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 13 field 7: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 12 field 3: invalid value</expected-warn>
-        <expected-warn>Parsing error at  line 9 field 6: a quote should be in the beginning</expected-warn>
+        <expected-warn>Parsing error at line 5 field 3: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 2 field 1: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 11 field 1: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 3 field 1: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 4 field 1: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 7 field 7: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 13 field 7: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 12 field 3: invalid value</expected-warn>
+        <expected-warn>Parsing error at line 9 field 6: a quote should be in the beginning</expected-warn>
 
         <expected-warn>Parsing error at data_dir/h_invalid_values.csv line 5 field 3: invalid value</expected-warn>
         <expected-warn>Parsing error at data_dir/h_invalid_values.csv line 2 field 1: invalid value</expected-warn>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 3a502d0..4c253bc 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -29,10 +29,10 @@
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class QuotedLineRecordReader extends LineRecordReader {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 2ff5cfa..0e23e46 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -40,10 +40,10 @@
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class SemiStructuredRecordReader extends StreamRecordReader {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
index 2bf0df4..2d20cc2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.parser;
 
+import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR;
+
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.BitSet;
@@ -46,6 +48,7 @@
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.util.ExceptionUtils;
 import org.apache.hyracks.data.std.api.IMutableValueStorage;
+import org.apache.hyracks.util.ParseUtil;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
@@ -433,18 +436,13 @@
             }
             long lineNum = lineNumber.getAsLong() + jsonParser.getCurrentLocation().getLineNr() - 1;
             JsonStreamContext parsingContext = jsonParser.getParsingContext();
-            String fieldName = "N/A";
-            while (parsingContext != null) {
-                String currentFieldName = parsingContext.getCurrentName();
-                if (currentFieldName != null) {
-                    fieldName = currentFieldName;
-                    break;
-                }
+            String fieldName = null;
+            while (parsingContext != null && fieldName == null) {
+                fieldName = parsingContext.getCurrentName();
                 parsingContext = parsingContext.getParent();
             }
-
-            return HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR,
-                    dataSourceName.get(), lineNum, fieldName, msg);
+            final String locationDetails = ParseUtil.asLocationDetailString(dataSourceName.get(), lineNum, fieldName);
+            return HyracksDataException.create(PARSING_ERROR, locationDetails, msg);
         }
         return new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
     }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 60e6e77..590f51d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -38,7 +38,6 @@
 import org.apache.asterix.external.api.IRecordDataParser;
 import org.apache.asterix.external.api.IStreamDataParser;
 import org.apache.asterix.external.util.ExternalDataConstants;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.asterix.om.base.AMutableString;
 import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
 import org.apache.asterix.om.types.ARecordType;
@@ -52,6 +51,7 @@
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
 import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
+import org.apache.hyracks.util.ParseUtil;
 
 public class DelimitedDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
deleted file mode 100644
index 5a46af7..0000000
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-import org.apache.hyracks.api.exceptions.ErrorCode;
-import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
-
-public class ParseUtil {
-
-    private ParseUtil() {
-    }
-
-    public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
-            String warnMessage) {
-        warningCollector
-                .warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName, lineNum, fieldNum, warnMessage));
-    }
-}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
new file mode 100644
index 0000000..63fec09
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util;
+
+import java.util.StringJoiner;
+
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+
+public class ParseUtil {
+
+    private ParseUtil() {
+    }
+
+    public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
+            String warnMessage) {
+        warningCollector.warn(Warning.of(null, ErrorCode.PARSING_ERROR,
+                asLocationDetailString(dataSourceName, lineNum, fieldNum), warnMessage));
+    }
+
+    public static String asLocationDetailString(String dataSource, long lineNum, Object fieldIdentifier) {
+        StringJoiner details = new StringJoiner(" ");
+        details.setEmptyValue("N/A");
+        if (dataSource != null && !dataSource.isEmpty()) {
+            details.add(dataSource);
+        }
+        if (lineNum >= 0) {
+            details.add("line " + lineNum);
+        }
+        if (fieldIdentifier instanceof Number) {
+            details.add("field " + fieldIdentifier);
+        } else if (fieldIdentifier instanceof String && !((String) fieldIdentifier).isEmpty()) {
+            details.add("field '" + fieldIdentifier + "'");
+        }
+        return "at " + details;
+    }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 9f04fb2..4d9c60b 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -141,7 +141,7 @@
 121 = A numeric type promotion error has occurred: %1$s
 122 = Encountered an error while printing the plan: %1$s
 123 = Insufficient memory is provided for the join operators, please increase the join memory budget.
-124 = Parsing error at %1$s line %2$s field %3$s: %4$s
+124 = Parsing error %s: %s
 125 = Invalid inverted list type traits: %1$s
 126 = Illegal state. %1$s
 
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 936d63e..ffc87cd 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -23,9 +23,8 @@
 import java.util.Arrays;
 import java.util.function.Supplier;
 
-import org.apache.hyracks.api.exceptions.ErrorCode;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.ParseUtil;
 
 public class FieldCursorForDelimitedDataParser {
 
@@ -448,6 +447,6 @@
     }
 
     private void warn(String message) {
-        warnings.warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName.get(), lineCount, fieldCount, message));
+        ParseUtil.warn(warnings, dataSourceName.get(), lineCount, fieldCount, message);
     }
 }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 3
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-MessageType: merged

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Michael Blow <mb...@apache.org>:

Michael Blow has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )


Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................

[NO ISSUE][MISC] Improve parser error reporting

Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
---
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
D asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
A hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
M hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
8 files changed, 68 insertions(+), 52 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/43/11543/1

diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 3a502d0..4c253bc 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -29,10 +29,10 @@
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class QuotedLineRecordReader extends LineRecordReader {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 2ff5cfa..0e23e46 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -40,10 +40,10 @@
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
 
 public class SemiStructuredRecordReader extends StreamRecordReader {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
index 2bf0df4..2d20cc2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.parser;
 
+import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR;
+
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.BitSet;
@@ -46,6 +48,7 @@
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.util.ExceptionUtils;
 import org.apache.hyracks.data.std.api.IMutableValueStorage;
+import org.apache.hyracks.util.ParseUtil;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
@@ -433,18 +436,13 @@
             }
             long lineNum = lineNumber.getAsLong() + jsonParser.getCurrentLocation().getLineNr() - 1;
             JsonStreamContext parsingContext = jsonParser.getParsingContext();
-            String fieldName = "N/A";
-            while (parsingContext != null) {
-                String currentFieldName = parsingContext.getCurrentName();
-                if (currentFieldName != null) {
-                    fieldName = currentFieldName;
-                    break;
-                }
+            String fieldName = null;
+            while (parsingContext != null && fieldName == null) {
+                fieldName = parsingContext.getCurrentName();
                 parsingContext = parsingContext.getParent();
             }
-
-            return HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR,
-                    dataSourceName.get(), lineNum, fieldName, msg);
+            final String locationDetails = ParseUtil.asLocationDetailString(dataSourceName.get(), lineNum, fieldName);
+            return HyracksDataException.create(PARSING_ERROR, locationDetails, msg);
         }
         return new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
     }
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 60e6e77..590f51d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -38,7 +38,6 @@
 import org.apache.asterix.external.api.IRecordDataParser;
 import org.apache.asterix.external.api.IStreamDataParser;
 import org.apache.asterix.external.util.ExternalDataConstants;
-import org.apache.asterix.external.util.ParseUtil;
 import org.apache.asterix.om.base.AMutableString;
 import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
 import org.apache.asterix.om.types.ARecordType;
@@ -52,6 +51,7 @@
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
 import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
 import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
+import org.apache.hyracks.util.ParseUtil;
 
 public class DelimitedDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
deleted file mode 100644
index 5a46af7..0000000
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-import org.apache.hyracks.api.exceptions.ErrorCode;
-import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
-
-public class ParseUtil {
-
-    private ParseUtil() {
-    }
-
-    public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
-            String warnMessage) {
-        warningCollector
-                .warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName, lineNum, fieldNum, warnMessage));
-    }
-}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
new file mode 100644
index 0000000..63fec09
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util;
+
+import java.util.StringJoiner;
+
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+
+public class ParseUtil {
+
+    private ParseUtil() {
+    }
+
+    public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
+            String warnMessage) {
+        warningCollector.warn(Warning.of(null, ErrorCode.PARSING_ERROR,
+                asLocationDetailString(dataSourceName, lineNum, fieldNum), warnMessage));
+    }
+
+    public static String asLocationDetailString(String dataSource, long lineNum, Object fieldIdentifier) {
+        StringJoiner details = new StringJoiner(" ");
+        details.setEmptyValue("N/A");
+        if (dataSource != null && !dataSource.isEmpty()) {
+            details.add(dataSource);
+        }
+        if (lineNum >= 0) {
+            details.add("line " + lineNum);
+        }
+        if (fieldIdentifier instanceof Number) {
+            details.add("field " + fieldIdentifier);
+        } else if (fieldIdentifier instanceof String && !((String) fieldIdentifier).isEmpty()) {
+            details.add("field '" + fieldIdentifier + "'");
+        }
+        return "at " + details;
+    }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 9f04fb2..4d9c60b 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -141,7 +141,7 @@
 121 = A numeric type promotion error has occurred: %1$s
 122 = Encountered an error while printing the plan: %1$s
 123 = Insufficient memory is provided for the join operators, please increase the join memory budget.
-124 = Parsing error at %1$s line %2$s field %3$s: %4$s
+124 = Parsing error %s: %s
 125 = Invalid inverted list type traits: %1$s
 126 = Illegal state. %1$s
 
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 936d63e..ffc87cd 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -23,9 +23,8 @@
 import java.util.Arrays;
 import java.util.function.Supplier;
 
-import org.apache.hyracks.api.exceptions.ErrorCode;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.ParseUtil;
 
 public class FieldCursorForDelimitedDataParser {
 
@@ -448,6 +447,6 @@
     }
 
     private void warn(String message) {
-        warnings.warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName.get(), lineCount, fieldCount, message));
+        ParseUtil.warn(warnings, dataSourceName.get(), lineCount, fieldCount, message);
     }
 }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-MessageType: newchange

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Till Westmann <ti...@apache.org>:

Till Westmann has posted comments on this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................


Patch Set 2: Code-Review+2


-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 2
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Comment-Date: Fri, 21 May 2021 01:28:29 +0000
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Jenkins <je...@fulliautomatix.ics.uci.edu>:

Jenkins has posted comments on this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................


Patch Set 2: Integration-Tests+1

Integration Tests Successful

https://asterix-jenkins.ics.uci.edu/job/asterix-gerrit-integration-tests/11997/ : SUCCESS


-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 2
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Comment-Date: Fri, 21 May 2021 00:00:43 +0000
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Michael Blow <mb...@apache.org>:

Michael Blow has posted comments on this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................


Patch Set 2: Code-Review+1


-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 2
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Comment-Date: Fri, 21 May 2021 01:21:47 +0000
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Jenkins <je...@fulliautomatix.ics.uci.edu>:

Jenkins has posted comments on this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................


Patch Set 1: Integration-Tests+1

Integration Tests Successful

https://asterix-jenkins.ics.uci.edu/job/asterix-gerrit-integration-tests/11996/ : SUCCESS


-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Comment-Date: Thu, 20 May 2021 21:44:16 +0000
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Till Westmann <ti...@apache.org>:

Till Westmann has posted comments on this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543 )

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................


Patch Set 1: Code-Review+1


-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Comment-Date: Thu, 20 May 2021 21:52:49 +0000
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment

Change in asterixdb[cheshire-cat]: [NO ISSUE][MISC] Improve parser error reporting

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Michael Blow <mb...@apache.org>:

Hello Hussain Towaileb, Till Westmann, Ali Alsuliman, Jenkins, 

I'd like you to reexamine a change. Please visit

    https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543

to look at the new patch set (#2).

Change subject: [NO ISSUE][MISC] Improve parser error reporting
......................................................................

[NO ISSUE][MISC] Improve parser error reporting

Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
---
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
D asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
A hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
M hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
11 files changed, 88 insertions(+), 72 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/43/11543/2
-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: cheshire-cat
Gerrit-Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Gerrit-Change-Number: 11543
Gerrit-PatchSet: 2
Gerrit-Owner: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Ali Alsuliman <al...@gmail.com>
Gerrit-Reviewer: Hussain Towaileb <hu...@gmail.com>
Gerrit-Reviewer: Jenkins <je...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-MessageType: newpatchset