You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/04/05 07:43:14 UTC
[impala] 02/03: IMPALA-11115: Fix hitting DCHECK for brotli and deflate compressions
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 6bf56c95c7a8791cdfddd4d075afff0cf26f471c
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Feb 17 17:13:30 2022 +0100
IMPALA-11115: Fix hitting DCHECK for brotli and deflate compressions
The DCHECK was hit when an unsupported compression was included in
enum THdfsCompression but not in COMPRESSION_MAP.
Removed COMPRESSION_MAP as we can get the names from enum
THdfsCompression directly.
In release builds this didn't cause a crash, only a weird error
message ("INVALID" instead of the compression name).
Testing:
- added ee tests that try to insert with brotli and deflate
Change-Id: Ic38294b108ff3c4aa0b49117df95c5a1b8c60a4b
Reviewed-on: http://gerrit.cloudera.org:8080/18242
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/util/codec.cc | 8 +++-----
common/thrift/CatalogObjects.thrift | 10 ----------
.../queries/QueryTest/insert_parquet_invalid_codec.test | 15 ++++++++++++---
tests/query_test/test_insert_parquet.py | 7 ++-----
4 files changed, 17 insertions(+), 23 deletions(-)
diff --git a/be/src/util/codec.cc b/be/src/util/codec.cc
index d78814362..b18f5adb3 100644
--- a/be/src/util/codec.cc
+++ b/be/src/util/codec.cc
@@ -20,6 +20,7 @@
#include <ostream>
#include <utility>
+#include <boost/algorithm/string.hpp>
#include <zstd.h>
#include "common/compiler-util.h"
@@ -59,11 +60,8 @@ const Codec::CodecMap Codec::CODEC_MAP = {{"", THdfsCompression::NONE},
{ZSTD_COMPRESSION, THdfsCompression::ZSTD}};
string Codec::GetCodecName(THdfsCompression::type type) {
- for (const CodecMap::value_type& codec: g_CatalogObjects_constants.COMPRESSION_MAP) {
- if (codec.second == type) return codec.first;
- }
- DCHECK(false) << "Missing codec in COMPRESSION_MAP: " << type;
- return "INVALID";
+ return boost::algorithm::to_lower_copy(
+ string(_THdfsCompression_VALUES_TO_NAMES.find(type)->second));
}
Status Codec::GetHadoopCodecClassName(THdfsCompression::type type, string* out_name) {
diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift
index e6bc98745..e49d2c99c 100644
--- a/common/thrift/CatalogObjects.thrift
+++ b/common/thrift/CatalogObjects.thrift
@@ -150,16 +150,6 @@ struct TCompressionCodec {
2: optional i32 compression_level
}
-// Mapping from names defined by Avro to values in the THdfsCompression enum.
-const map<string, THdfsCompression> COMPRESSION_MAP = {
- "": THdfsCompression.NONE,
- "none": THdfsCompression.NONE,
- "deflate": THdfsCompression.DEFAULT,
- "gzip": THdfsCompression.GZIP,
- "bzip2": THdfsCompression.BZIP2,
- "snappy": THdfsCompression.SNAPPY
-}
-
// Represents a single item in a partition spec (column name + value)
struct TPartitionKeyValue {
// Partition column name
diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test b/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test
index fc5d8bde3..5985b5b1e 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test
@@ -1,14 +1,23 @@
====
---- QUERY
# Insert parquet table with unsupported codec
-create table if not exists parquet_invalid_codec (x BIGINT) stored as parquet
-location '$FILESYSTEM_PREFIX/test-warehouse/parquet_invalid_codec';
+create table parquet_invalid_codec (x BIGINT) stored as parquet;
====
---- QUERY
+set compression_codec=bzip2;
insert overwrite table parquet_invalid_codec select 1;
---- CATCH
Invalid parquet compression codec bzip2
====
---- QUERY
-drop table parquet_invalid_codec
+set compression_codec=deflate;
+insert overwrite table parquet_invalid_codec select 1;
+---- CATCH
+Invalid parquet compression codec deflate
+====
+---- QUERY
+set compression_codec=brotli;
+insert overwrite table parquet_invalid_codec select 1;
+---- CATCH
+Invalid parquet compression codec brotli
====
diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py
index bd898a8a8..8cbdba687 100644
--- a/tests/query_test/test_insert_parquet.py
+++ b/tests/query_test/test_insert_parquet.py
@@ -176,11 +176,8 @@ class TestInsertParquetInvalidCodec(ImpalaTestSuite):
lambda v: v.get_value('table_format').compression_codec == 'none')
@SkipIfLocal.multiple_impalad
- def test_insert_parquet_invalid_codec(self, vector):
- vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
- vector.get_value('compression_codec')
- self.run_test_case('QueryTest/insert_parquet_invalid_codec', vector,
- multiple_impalad=True)
+ def test_insert_parquet_invalid_codec(self, vector, unique_database):
+ self.run_test_case('QueryTest/insert_parquet_invalid_codec', vector, unique_database)
class TestInsertParquetVerifySize(ImpalaTestSuite):