You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/04/05 07:43:14 UTC

[impala] 02/03: IMPALA-11115: Fix hitting DCHECK for brotli and deflate compressions

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6bf56c95c7a8791cdfddd4d075afff0cf26f471c
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Feb 17 17:13:30 2022 +0100

    IMPALA-11115: Fix hitting DCHECK for brotli and deflate compressions
    
    The DCHECK was hit when an unsupported compression was included in
    enum THdfsCompression but not in COMPRESSION_MAP.
    Removed COMPRESSION_MAP as we can get the names from enum
    THdfsCompression directly.
    
    In release builds this didn't cause a crash, only a weird error
    message ("INVALID" instead of the compression name).
    
    Testing:
    - added ee tests that try to insert with brotli and deflate
    
    Change-Id: Ic38294b108ff3c4aa0b49117df95c5a1b8c60a4b
    Reviewed-on: http://gerrit.cloudera.org:8080/18242
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/codec.cc                                      |  8 +++-----
 common/thrift/CatalogObjects.thrift                       | 10 ----------
 .../queries/QueryTest/insert_parquet_invalid_codec.test   | 15 ++++++++++++---
 tests/query_test/test_insert_parquet.py                   |  7 ++-----
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/be/src/util/codec.cc b/be/src/util/codec.cc
index d78814362..b18f5adb3 100644
--- a/be/src/util/codec.cc
+++ b/be/src/util/codec.cc
@@ -20,6 +20,7 @@
 #include <ostream>
 #include <utility>
 
+#include <boost/algorithm/string.hpp>
 #include <zstd.h>
 
 #include "common/compiler-util.h"
@@ -59,11 +60,8 @@ const Codec::CodecMap Codec::CODEC_MAP = {{"", THdfsCompression::NONE},
     {ZSTD_COMPRESSION, THdfsCompression::ZSTD}};
 
 string Codec::GetCodecName(THdfsCompression::type type) {
-  for (const CodecMap::value_type& codec: g_CatalogObjects_constants.COMPRESSION_MAP) {
-    if (codec.second == type) return codec.first;
-  }
-  DCHECK(false) << "Missing codec in COMPRESSION_MAP: " << type;
-  return "INVALID";
+  return boost::algorithm::to_lower_copy(
+      string(_THdfsCompression_VALUES_TO_NAMES.find(type)->second));
 }
 
 Status Codec::GetHadoopCodecClassName(THdfsCompression::type type, string* out_name) {
diff --git a/common/thrift/CatalogObjects.thrift b/common/thrift/CatalogObjects.thrift
index e6bc98745..e49d2c99c 100644
--- a/common/thrift/CatalogObjects.thrift
+++ b/common/thrift/CatalogObjects.thrift
@@ -150,16 +150,6 @@ struct TCompressionCodec {
   2: optional i32 compression_level
 }
 
-// Mapping from names defined by Avro to values in the THdfsCompression enum.
-const map<string, THdfsCompression> COMPRESSION_MAP = {
-  "": THdfsCompression.NONE,
-  "none": THdfsCompression.NONE,
-  "deflate": THdfsCompression.DEFAULT,
-  "gzip": THdfsCompression.GZIP,
-  "bzip2": THdfsCompression.BZIP2,
-  "snappy": THdfsCompression.SNAPPY
-}
-
 // Represents a single item in a partition spec (column name + value)
 struct TPartitionKeyValue {
   // Partition column name
diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test b/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test
index fc5d8bde3..5985b5b1e 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/insert_parquet_invalid_codec.test
@@ -1,14 +1,23 @@
 ====
 ---- QUERY
 # Insert parquet table with unsupported codec
-create table if not exists parquet_invalid_codec (x BIGINT) stored as parquet
-location '$FILESYSTEM_PREFIX/test-warehouse/parquet_invalid_codec';
+create table parquet_invalid_codec (x BIGINT) stored as parquet;
 ====
 ---- QUERY
+set compression_codec=bzip2;
 insert overwrite table parquet_invalid_codec select 1;
 ---- CATCH
 Invalid parquet compression codec bzip2
 ====
 ---- QUERY
-drop table parquet_invalid_codec
+set compression_codec=deflate;
+insert overwrite table parquet_invalid_codec select 1;
+---- CATCH
+Invalid parquet compression codec deflate
+====
+---- QUERY
+set compression_codec=brotli;
+insert overwrite table parquet_invalid_codec select 1;
+---- CATCH
+Invalid parquet compression codec brotli
 ====
diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py
index bd898a8a8..8cbdba687 100644
--- a/tests/query_test/test_insert_parquet.py
+++ b/tests/query_test/test_insert_parquet.py
@@ -176,11 +176,8 @@ class TestInsertParquetInvalidCodec(ImpalaTestSuite):
         lambda v: v.get_value('table_format').compression_codec == 'none')
 
   @SkipIfLocal.multiple_impalad
-  def test_insert_parquet_invalid_codec(self, vector):
-    vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
-        vector.get_value('compression_codec')
-    self.run_test_case('QueryTest/insert_parquet_invalid_codec', vector,
-                       multiple_impalad=True)
+  def test_insert_parquet_invalid_codec(self, vector, unique_database):
+    self.run_test_case('QueryTest/insert_parquet_invalid_codec', vector, unique_database)
 
 
 class TestInsertParquetVerifySize(ImpalaTestSuite):