You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/04/11 16:17:09 UTC

[impala] branch master updated (7b235eebd -> 26398855b)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 7b235eebd IMPALA-11230: Add test for crash in partitioned Top-N codegen code
     new 6358db758 IMPALA-11227: FE OOM in TestParquetBloomFilter.test_fallback_from_dict_if_no_bloom_tbl_props
     new 26398855b IMPALA-10930: Bump the Java artifact versions to 4.1.0-SNAPSHOT

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 fe/pom.xml                                    |  2 +-
 java/TableFlattener/pom.xml                   |  2 +-
 java/datagenerator/pom.xml                    |  2 +-
 java/executor-deps/pom.xml                    |  2 +-
 java/ext-data-source/api/pom.xml              |  2 +-
 java/ext-data-source/pom.xml                  |  2 +-
 java/ext-data-source/sample/pom.xml           |  2 +-
 java/ext-data-source/test/pom.xml             |  2 +-
 java/pom.xml                                  |  2 +-
 java/query-event-hook-api/pom.xml             |  2 +-
 java/shaded-deps/hive-exec/pom.xml            |  2 +-
 java/shaded-deps/s3a-aws-sdk/pom.xml          |  2 +-
 java/test-hive-udfs/pom.xml                   |  2 +-
 java/yarn-extras/pom.xml                      |  2 +-
 tests/query_test/test_parquet_bloom_filter.py | 28 +++++++++++++--------------
 15 files changed, 27 insertions(+), 29 deletions(-)


[impala] 01/02: IMPALA-11227: FE OOM in TestParquetBloomFilter.test_fallback_from_dict_if_no_bloom_tbl_props

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6358db7587cc2218e4523dae16ac5e362edf37ef
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Wed Apr 6 16:12:33 2022 +0200

    IMPALA-11227: FE OOM in TestParquetBloomFilter.test_fallback_from_dict_if_no_bloom_tbl_props
    
    The huge values clause of the insert SQL statement in
    TestParquetBloomFilter.test_fallback_from_dict_if_no_bloom_tbl_props
    could cause an OutOfMemory error in the FE.
    
    We use a SQL statement with a huge values clause (more than 40 000
    elements) to insert values into a parquet table in some tests, and the
    size of the SQL statement string sometimes causes an OOM error.
    
    After this change, we create these parquet tables with a CTAS from an
    existing table, avoiding any long SQL statements.
    
    Change-Id: I923cc9ba4b6829a2f15e93365f2849b89248598b
    Reviewed-on: http://gerrit.cloudera.org:8080/18387
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_parquet_bloom_filter.py | 28 +++++++++++++--------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/tests/query_test/test_parquet_bloom_filter.py b/tests/query_test/test_parquet_bloom_filter.py
index 998cfd34f..eda93999b 100644
--- a/tests/query_test/test_parquet_bloom_filter.py
+++ b/tests/query_test/test_parquet_bloom_filter.py
@@ -161,9 +161,9 @@ class TestParquetBloomFilter(ImpalaTestSuite):
     # Query an element that is and one that is not present in the table and check whether
     # we correctly do not skip and skip the row group, respectively.
     self._query_element_check_profile(vector, str(unique_database), tbl_name, column_name,
-        0, ['NumBloomFilteredRowGroups: 0 (0)'], ['NumBloomFilteredRowGroups: 1 (1)'])
+        2, ['NumBloomFilteredRowGroups: 0 (0)'], ['NumBloomFilteredRowGroups: 1 (1)'])
     self._query_element_check_profile(vector, str(unique_database), tbl_name, column_name,
-        1, ['NumBloomFilteredRowGroups: 1 (1)'], ['NumBloomFilteredRowGroups: 0 (0)'])
+        3, ['NumBloomFilteredRowGroups: 1 (1)'], ['NumBloomFilteredRowGroups: 0 (0)'])
 
   def test_fallback_from_dict_if_no_bloom_tbl_props(self, vector, unique_database,
       tmpdir):
@@ -196,6 +196,7 @@ class TestParquetBloomFilter(ImpalaTestSuite):
     result_in_table = self.execute_query(query_stmt.format(col_name=col_name,
         db=db_name, tbl=tbl_name, value=element),
         vector.get_value('exec_option'))
+
     for s in strings_in_profile:
       assert s in result_in_table.runtime_profile
     for s in strings_not_in_profile:
@@ -208,24 +209,21 @@ class TestParquetBloomFilter(ImpalaTestSuite):
     fpp = 0.05
     bitset_size = self._optimal_bitset_size(ndv, fpp)
 
-    # We create a table with a single BIGINT column, optionally with table properties for
-    # Bloom filtering.
-    create_stmt = 'create table {db}.{tbl} ({col_name} BIGINT) stored as parquet'
-    if bloom_tbl_prop:
-      create_stmt += ' TBLPROPERTIES("parquet.bloom.filter.columns"="{col_name}:{size}")'
-    create_stmt = create_stmt.format(
-        db=db_name, tbl=tbl_name, col_name=column_name, size=bitset_size)
+    bloom_tbl_props = \
+        'TBLPROPERTIES("parquet.bloom.filter.columns"="{col_name}:{size}")'.format(
+            col_name=column_name, size=bitset_size)
 
-    # We only insert even numbers so an odd number should be filtered out based on the
-    # Bloom filter.
-    values = ['({})'.format(i * 2) for i in range(ndv)]
-    insert_stmt = 'insert into {db}.{tbl} values {values}'.format(
-        db=db_name, tbl=tbl_name, values=','.join(values))
+    # Create a parquet table containing only even numbers so an odd number should be
+    # filtered out based on the Bloom filter (if there is one).
+    create_stmt_template = 'create table {db}.{tbl} stored as parquet {tbl_props} \
+        as (select (row_number() over (order by o_orderkey)) * 2 as {col} \
+        from tpch_parquet.orders limit {ndv})'
+    create_stmt = create_stmt_template.format(db=db_name, tbl=tbl_name,
+        tbl_props=bloom_tbl_props if bloom_tbl_prop else "", col=column_name, ndv=ndv)
 
     vector.get_value('exec_option')['num_nodes'] = 1
     vector.get_value('exec_option')['parquet_bloom_filter_write'] = 'IF_NO_DICT'
     self.execute_query(create_stmt, vector.get_value('exec_option'))
-    self.execute_query(insert_stmt, vector.get_value('exec_option'))
 
   def _optimal_bitset_size(self, ndv, fpp):
     """ Based on ParquetBloomFilter::OptimalByteSize() in


[impala] 02/02: IMPALA-10930: Bump the Java artifact versions to 4.1.0-SNAPSHOT

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 26398855bfcd7017e9c44d0358f0d6bc3ba5b1e9
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Apr 6 19:35:09 2022 -0700

    IMPALA-10930: Bump the Java artifact versions to 4.1.0-SNAPSHOT
    
    This changes the Maven pom.xml files to use verison
    4.1.0-SNAPSHOT rather than 4.0.0-SNAPSHOT. In the
    past, these versions were a fixed value, but that
    changed with IMPALA-10198. This is a new step that
    needs to happen on each release.
    
    Testing:
     - Ran a build
    
    Change-Id: I10a589b4fbc15048199943a0e06d079f57840239
    Reviewed-on: http://gerrit.cloudera.org:8080/18390
    Reviewed-by: Tamas Mate <tm...@apache.org>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 fe/pom.xml                           | 2 +-
 java/TableFlattener/pom.xml          | 2 +-
 java/datagenerator/pom.xml           | 2 +-
 java/executor-deps/pom.xml           | 2 +-
 java/ext-data-source/api/pom.xml     | 2 +-
 java/ext-data-source/pom.xml         | 2 +-
 java/ext-data-source/sample/pom.xml  | 2 +-
 java/ext-data-source/test/pom.xml    | 2 +-
 java/pom.xml                         | 2 +-
 java/query-event-hook-api/pom.xml    | 2 +-
 java/shaded-deps/hive-exec/pom.xml   | 2 +-
 java/shaded-deps/s3a-aws-sdk/pom.xml | 2 +-
 java/test-hive-udfs/pom.xml          | 2 +-
 java/yarn-extras/pom.xml             | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/fe/pom.xml b/fe/pom.xml
index 62cb57ce1..092203317 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -23,7 +23,7 @@ under the License.
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
     <relativePath>../java/pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/java/TableFlattener/pom.xml b/java/TableFlattener/pom.xml
index 53374b211..789fd09c8 100644
--- a/java/TableFlattener/pom.xml
+++ b/java/TableFlattener/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <artifactId>nested-table-flattener</artifactId>
diff --git a/java/datagenerator/pom.xml b/java/datagenerator/pom.xml
index dcc1a1dd6..89e64ca1b 100644
--- a/java/datagenerator/pom.xml
+++ b/java/datagenerator/pom.xml
@@ -23,7 +23,7 @@ under the License.
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/executor-deps/pom.xml b/java/executor-deps/pom.xml
index 5c4b53261..05d4aeb1c 100644
--- a/java/executor-deps/pom.xml
+++ b/java/executor-deps/pom.xml
@@ -34,7 +34,7 @@ under the License.
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.apache.impala</groupId>
diff --git a/java/ext-data-source/api/pom.xml b/java/ext-data-source/api/pom.xml
index 6a6bd20de..11ef1ba71 100644
--- a/java/ext-data-source/api/pom.xml
+++ b/java/ext-data-source/api/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-data-source</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <artifactId>impala-data-source-api</artifactId>
   <name>Apache Impala External Data Source API</name>
diff --git a/java/ext-data-source/pom.xml b/java/ext-data-source/pom.xml
index 6d0215907..89a514e29 100644
--- a/java/ext-data-source/pom.xml
+++ b/java/ext-data-source/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <artifactId>impala-data-source</artifactId>
diff --git a/java/ext-data-source/sample/pom.xml b/java/ext-data-source/sample/pom.xml
index 29a88f31e..aa12f0d1f 100644
--- a/java/ext-data-source/sample/pom.xml
+++ b/java/ext-data-source/sample/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-data-source</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <artifactId>impala-data-source-sample</artifactId>
   <name>Apache Impala External Data Source Sample</name>
diff --git a/java/ext-data-source/test/pom.xml b/java/ext-data-source/test/pom.xml
index b2a175b18..fd16e7892 100644
--- a/java/ext-data-source/test/pom.xml
+++ b/java/ext-data-source/test/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-data-source</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <artifactId>impala-data-source-test</artifactId>
   <name>Apache Impala External Data Source Test Library</name>
diff --git a/java/pom.xml b/java/pom.xml
index 81bd03449..60109686a 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@ under the License.
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.apache.impala</groupId>
   <artifactId>impala-parent</artifactId>
-  <version>4.0.0-SNAPSHOT</version>
+  <version>4.1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Apache Impala Parent POM</name>
 
diff --git a/java/query-event-hook-api/pom.xml b/java/query-event-hook-api/pom.xml
index ddd64ad42..eeae99595 100644
--- a/java/query-event-hook-api/pom.xml
+++ b/java/query-event-hook-api/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <artifactId>query-event-hook-api</artifactId>
diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml
index 58956c98a..642ba3e6e 100644
--- a/java/shaded-deps/hive-exec/pom.xml
+++ b/java/shaded-deps/hive-exec/pom.xml
@@ -27,7 +27,7 @@ the same dependencies
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/java/shaded-deps/s3a-aws-sdk/pom.xml b/java/shaded-deps/s3a-aws-sdk/pom.xml
index 665ebafdc..9040a4635 100644
--- a/java/shaded-deps/s3a-aws-sdk/pom.xml
+++ b/java/shaded-deps/s3a-aws-sdk/pom.xml
@@ -25,7 +25,7 @@ though some of them might not be necessary. The exclusions are sorted alphabetic
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
   <modelVersion>4.0.0</modelVersion>
diff --git a/java/test-hive-udfs/pom.xml b/java/test-hive-udfs/pom.xml
index d4f9c03e2..cd241bdae 100644
--- a/java/test-hive-udfs/pom.xml
+++ b/java/test-hive-udfs/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/yarn-extras/pom.xml b/java/yarn-extras/pom.xml
index 1fded2a4a..d45fcd3e6 100644
--- a/java/yarn-extras/pom.xml
+++ b/java/yarn-extras/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.impala</groupId>
     <artifactId>impala-parent</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
+    <version>4.1.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <artifactId>yarn-extras</artifactId>