You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/24 00:44:37 UTC

[impala] 01/02: IMPALA-8885: Improve Parquet version metadata error

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit af0e04f33bbf2e93b7676ed7768c335c49b195f2
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Aug 22 17:24:53 2019 -0700

    IMPALA-8885: Improve Parquet version metadata error
    
    Update the error message to make it more obvious that
    the error could occur by trying to parse a non-Parquet
    file as Parquet
    
    Updated tests that depended on the error test.
    
    Change-Id: I2b36586dba14a31a613d79a0e28efc9a5173e75d
    Reviewed-on: http://gerrit.cloudera.org:8080/14126
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 common/thrift/generate_error_codes.py                          |  6 ++++--
 .../queries/QueryTest/parquet-error-propagation-race.test      |  2 +-
 .../workloads/functional-query/queries/QueryTest/parquet.test  |  2 +-
 tests/metadata/test_stale_metadata.py                          | 10 ++++++----
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index d9d4d68..9dee6e2 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -192,8 +192,10 @@ error_codes = (
   ("STALE_METADATA_FILE_TOO_SHORT", 59, "Metadata for file '$0' appears stale. "
    "Try running \\\"refresh $1\\\" to reload the file metadata."),
 
-  ("PARQUET_BAD_VERSION_NUMBER", 60, "File '$0' has an invalid version number: $1\\n"
-   "This could be due to stale metadata. Try running \\\"refresh $2\\\"."),
+  ("PARQUET_BAD_VERSION_NUMBER", 60, "File '$0' has an invalid Parquet version number: "
+   "$1\\n. Please check that it is a valid Parquet file. "
+   "This error can also occur due to stale metadata. "
+   "If you believe this is a valid Parquet file, try running \\\"refresh $2\\\"."),
 
   ("SCANNER_INCOMPLETE_READ", 61, "Tried to read $0 bytes but could only read $1 bytes. "
    "This may indicate data file corruption. (file $2, byte offset: $3)"),
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test
index 4104595..8d7b97b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test
@@ -9,5 +9,5 @@ INSERT INTO bad_magic_number SELECT 'good';
 set debug_action="0:SCANNER_ERROR:DELAY";
 SELECT * FROM bad_magic_number
 ---- CATCH
-invalid version number
+invalid Parquet version number
 ====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet.test b/testdata/workloads/functional-query/queries/QueryTest/parquet.test
index 0d76ab1..b0b188f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet.test
@@ -50,7 +50,7 @@ bigint,bigint,string,string,boolean,boolean,bigint,bigint,bigint,bigint
 # Parquet file with invalid magic number
 SELECT * from bad_magic_number
 ---- CATCH
-File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid version number: XXXX
+File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid Parquet version number: XXXX
 ====
 ---- QUERY
 # count(*) query on parquet file with multiple blocks (one block per node)
diff --git a/tests/metadata/test_stale_metadata.py b/tests/metadata/test_stale_metadata.py
index cd72035..9e5bf5f 100644
--- a/tests/metadata/test_stale_metadata.py
+++ b/tests/metadata/test_stale_metadata.py
@@ -89,12 +89,14 @@ class TestRewrittenFile(ImpalaTestSuite):
   def test_new_file_longer(self, vector, unique_database):
     """Rewrites an existing file with a new longer file."""
     # Full error is something like:
-    #   File '...' has an invalid version number: ff4C
-    #   This could be due to stale metadata. Try running "refresh
-    #   unique_database_name.new_file_longer".
+    # "File '..' has an invalid Parquet version number: ff4C
+    # Please check that it is a valid Parquet file. his error can also occur due to
+    # stale metadata. If you believe this is a valid Parquet file, try running
+    # "refresh ...".
     table_name = "new_file_longer"
     self.__overwrite_file_and_query(unique_database, table_name,
-      self.SHORT_FILE, self.LONG_FILE, 'invalid version number', self.LONG_FILE_NUM_ROWS)
+      self.SHORT_FILE, self.LONG_FILE, 'invalid Parquet version number',
+      self.LONG_FILE_NUM_ROWS)
 
   def test_delete_file(self, vector, unique_database):
     """Deletes an existing file without refreshing metadata."""