You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/08/28 00:05:39 UTC

[impala] branch master updated: IMPALA-12387: PartialUpdates is misleading for LOCAL filter

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 218c4c447 IMPALA-12387: PartialUpdates is misleading for LOCAL filter
218c4c447 is described below

commit 218c4c447eadb14fadb8310db4b46ab8c04cb1ba
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Aug 21 11:02:21 2023 -0700

    IMPALA-12387: PartialUpdates is misleading for LOCAL filter
    
    In filter table, PartialUpdates is intended to mark if coordinator
    receive only partial update from contributing fragments. This can be
    misleading for LOCAL filter in column "Min value", "Max value", and
    "In-list size", because LOCAL filter does not aggregate in coordinator
    anymore. Thus, coordinator does not receive any filter update.
    
    This patch mark such column value as "LOCAL" if no global aggregation is
    expected in coordinator.
    
    Testing:
    - Pass core tests.
    
    Change-Id: I56078a458799671246ff90b831e5ecebd04a78e8
    Reviewed-on: http://gerrit.cloudera.org:8080/20397
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/coordinator.cc                                  | 10 ++++++++--
 .../functional-query/queries/QueryTest/in_list_filters.test    |  6 +++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index e5f82edfe..1bce78dde 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -662,6 +662,7 @@ string Coordinator::FilterDebugString() {
 
       // Also add the min/max value for the accumulated filter as follows.
       //  'PartialUpdates' - The min and the max are partially updated;
+      //  'LOCAL'          - It is a local filter that is not aggregate in coordinator;
       //  'AlwaysTrue'     - One received filter is AlwaysTrue;
       //  'AlwaysFalse'    - No filter is received or all received filters are empty;
       //  'Real values'    - The final accumulated min/max from all filters received.
@@ -686,9 +687,12 @@ string Coordinator::FilterDebugString() {
             row.push_back(MinMaxFilter::DebugString(minmax_filterPB.max(),
                 ColumnType::FromThrift(state.desc().src_expr.nodes[0].type)));
           }
-        } else {
+        } else if (state.desc().has_remote_targets) {
           row.push_back("PartialUpdates");
           row.push_back("PartialUpdates");
+        } else {
+          row.push_back("LOCAL");
+          row.push_back("LOCAL");
         }
       }
       row.push_back("");
@@ -707,8 +711,10 @@ string Coordinator::FilterDebugString() {
         } else {
           row.push_back(std::to_string(in_list_filterPB.value().size()));
         }
-      } else {
+      } else if (state.desc().has_remote_targets) {
         row.push_back("PartialUpdates");
+      } else {
+        row.push_back("LOCAL");
       }
     }
     table_printer.AddRow(row);
diff --git a/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test b/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test
index 919c2d850..aafab1756 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test
@@ -178,7 +178,7 @@ row_regex: .*RowsRead: 2.43K \(2433\).*
 #
 # ID  Src. Node  Tgt. Node(s)  Target type  Partition filter  Pending (Expected)  First arrived  Completed  Enabled  Bloom Size  Est fpp  Min value  Max value     In-list size
 #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-#  1          3             0        LOCAL             false               0 (3)            N/A        N/A     true     IN_LIST                                  PartialUpdates
+#  1          3             0        LOCAL             false               0 (3)            N/A        N/A     true     IN_LIST                                           LOCAL
 #  0          4             1       REMOTE             false               0 (3)      431.952ms  431.953ms     true     IN_LIST                                               1
 select count(*) from alltypes t, alltypestiny a, alltypestiny b
 where t.id = a.id and a.tinyint_col = b.tinyint_col and b.id = 0;
@@ -233,7 +233,7 @@ row_regex: .*Filter 0 arrival with 1 items.*
 # Final filter table:
 # ID  Src. Node  Tgt. Node(s)  Target type  Partition filter  Pending (Expected)  First arrived  Completed  Enabled  Bloom Size  Est fpp  Min value  Max value     In-list size
 #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-#  1          3             0        LOCAL             false               0 (3)            N/A        N/A     true     IN_LIST                                  PartialUpdates
+#  1          3             0        LOCAL             false               0 (3)            N/A        N/A     true     IN_LIST                                           LOCAL
 #  0          4             1       REMOTE             false               0 (3)      427.938ms  427.947ms     true     IN_LIST                                               5
 select STRAIGHT_JOIN count(*)
 from date_tbl t
@@ -251,7 +251,7 @@ row_regex: .*Filter 0 arrival with 5 items.*
 # Final filter table:
 # ID  Src. Node  Tgt. Node(s)  Target type  Partition filter  Pending (Expected)  First arrived  Completed  Enabled  Bloom Size  Est fpp  Min value  Max value     In-list size
 #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-#  1          3             0        LOCAL             false               0 (1)            N/A        N/A     true     IN_LIST                                  PartialUpdates
+#  1          3             0        LOCAL             false               0 (1)            N/A        N/A     true     IN_LIST                                           LOCAL
 #  0          4             1       REMOTE             false               0 (1)       87.270ms   87.271ms     true     IN_LIST                                               1
 select count(*)
 from tpch_orc_def.supplier, tpch_orc_def.nation, tpch_orc_def.region