You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/24 00:44:37 UTC
[impala] 01/02: IMPALA-8885: Improve Parquet version metadata error
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit af0e04f33bbf2e93b7676ed7768c335c49b195f2
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Aug 22 17:24:53 2019 -0700
IMPALA-8885: Improve Parquet version metadata error
Update the error message to make it more obvious that
the error could occur by trying to parse a non-Parquet
file as Parquet
Updated tests that depended on the error test.
Change-Id: I2b36586dba14a31a613d79a0e28efc9a5173e75d
Reviewed-on: http://gerrit.cloudera.org:8080/14126
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
common/thrift/generate_error_codes.py | 6 ++++--
.../queries/QueryTest/parquet-error-propagation-race.test | 2 +-
.../workloads/functional-query/queries/QueryTest/parquet.test | 2 +-
tests/metadata/test_stale_metadata.py | 10 ++++++----
4 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index d9d4d68..9dee6e2 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -192,8 +192,10 @@ error_codes = (
("STALE_METADATA_FILE_TOO_SHORT", 59, "Metadata for file '$0' appears stale. "
"Try running \\\"refresh $1\\\" to reload the file metadata."),
- ("PARQUET_BAD_VERSION_NUMBER", 60, "File '$0' has an invalid version number: $1\\n"
- "This could be due to stale metadata. Try running \\\"refresh $2\\\"."),
+ ("PARQUET_BAD_VERSION_NUMBER", 60, "File '$0' has an invalid Parquet version number: "
+ "$1\\n. Please check that it is a valid Parquet file. "
+ "This error can also occur due to stale metadata. "
+ "If you believe this is a valid Parquet file, try running \\\"refresh $2\\\"."),
("SCANNER_INCOMPLETE_READ", 61, "Tried to read $0 bytes but could only read $1 bytes. "
"This may indicate data file corruption. (file $2, byte offset: $3)"),
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test
index 4104595..8d7b97b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-error-propagation-race.test
@@ -9,5 +9,5 @@ INSERT INTO bad_magic_number SELECT 'good';
set debug_action="0:SCANNER_ERROR:DELAY";
SELECT * FROM bad_magic_number
---- CATCH
-invalid version number
+invalid Parquet version number
====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet.test b/testdata/workloads/functional-query/queries/QueryTest/parquet.test
index 0d76ab1..b0b188f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet.test
@@ -50,7 +50,7 @@ bigint,bigint,string,string,boolean,boolean,bigint,bigint,bigint,bigint
# Parquet file with invalid magic number
SELECT * from bad_magic_number
---- CATCH
-File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid version number: XXXX
+File '$NAMENODE/test-warehouse/bad_magic_number_parquet/bad_magic_number.parquet' has an invalid Parquet version number: XXXX
====
---- QUERY
# count(*) query on parquet file with multiple blocks (one block per node)
diff --git a/tests/metadata/test_stale_metadata.py b/tests/metadata/test_stale_metadata.py
index cd72035..9e5bf5f 100644
--- a/tests/metadata/test_stale_metadata.py
+++ b/tests/metadata/test_stale_metadata.py
@@ -89,12 +89,14 @@ class TestRewrittenFile(ImpalaTestSuite):
def test_new_file_longer(self, vector, unique_database):
"""Rewrites an existing file with a new longer file."""
# Full error is something like:
- # File '...' has an invalid version number: ff4C
- # This could be due to stale metadata. Try running "refresh
- # unique_database_name.new_file_longer".
+ # "File '..' has an invalid Parquet version number: ff4C
+ # Please check that it is a valid Parquet file. his error can also occur due to
+ # stale metadata. If you believe this is a valid Parquet file, try running
+ # "refresh ...".
table_name = "new_file_longer"
self.__overwrite_file_and_query(unique_database, table_name,
- self.SHORT_FILE, self.LONG_FILE, 'invalid version number', self.LONG_FILE_NUM_ROWS)
+ self.SHORT_FILE, self.LONG_FILE, 'invalid Parquet version number',
+ self.LONG_FILE_NUM_ROWS)
def test_delete_file(self, vector, unique_database):
"""Deletes an existing file without refreshing metadata."""