You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2023/08/28 00:05:39 UTC
[impala] branch master updated: IMPALA-12387: PartialUpdates is misleading for LOCAL filter
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 218c4c447 IMPALA-12387: PartialUpdates is misleading for LOCAL filter
218c4c447 is described below
commit 218c4c447eadb14fadb8310db4b46ab8c04cb1ba
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Mon Aug 21 11:02:21 2023 -0700
IMPALA-12387: PartialUpdates is misleading for LOCAL filter
In filter table, PartialUpdates is intended to mark if coordinator
receive only partial update from contributing fragments. This can be
misleading for LOCAL filter in column "Min value", "Max value", and
"In-list size", because LOCAL filter does not aggregate in coordinator
anymore. Thus, coordinator does not receive any filter update.
This patch mark such column value as "LOCAL" if no global aggregation is
expected in coordinator.
Testing:
- Pass core tests.
Change-Id: I56078a458799671246ff90b831e5ecebd04a78e8
Reviewed-on: http://gerrit.cloudera.org:8080/20397
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/coordinator.cc | 10 ++++++++--
.../functional-query/queries/QueryTest/in_list_filters.test | 6 +++---
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index e5f82edfe..1bce78dde 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -662,6 +662,7 @@ string Coordinator::FilterDebugString() {
// Also add the min/max value for the accumulated filter as follows.
// 'PartialUpdates' - The min and the max are partially updated;
+ // 'LOCAL' - It is a local filter that is not aggregate in coordinator;
// 'AlwaysTrue' - One received filter is AlwaysTrue;
// 'AlwaysFalse' - No filter is received or all received filters are empty;
// 'Real values' - The final accumulated min/max from all filters received.
@@ -686,9 +687,12 @@ string Coordinator::FilterDebugString() {
row.push_back(MinMaxFilter::DebugString(minmax_filterPB.max(),
ColumnType::FromThrift(state.desc().src_expr.nodes[0].type)));
}
- } else {
+ } else if (state.desc().has_remote_targets) {
row.push_back("PartialUpdates");
row.push_back("PartialUpdates");
+ } else {
+ row.push_back("LOCAL");
+ row.push_back("LOCAL");
}
}
row.push_back("");
@@ -707,8 +711,10 @@ string Coordinator::FilterDebugString() {
} else {
row.push_back(std::to_string(in_list_filterPB.value().size()));
}
- } else {
+ } else if (state.desc().has_remote_targets) {
row.push_back("PartialUpdates");
+ } else {
+ row.push_back("LOCAL");
}
}
table_printer.AddRow(row);
diff --git a/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test b/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test
index 919c2d850..aafab1756 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/in_list_filters.test
@@ -178,7 +178,7 @@ row_regex: .*RowsRead: 2.43K \(2433\).*
#
# ID Src. Node Tgt. Node(s) Target type Partition filter Pending (Expected) First arrived Completed Enabled Bloom Size Est fpp Min value Max value In-list size
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-# 1 3 0 LOCAL false 0 (3) N/A N/A true IN_LIST PartialUpdates
+# 1 3 0 LOCAL false 0 (3) N/A N/A true IN_LIST LOCAL
# 0 4 1 REMOTE false 0 (3) 431.952ms 431.953ms true IN_LIST 1
select count(*) from alltypes t, alltypestiny a, alltypestiny b
where t.id = a.id and a.tinyint_col = b.tinyint_col and b.id = 0;
@@ -233,7 +233,7 @@ row_regex: .*Filter 0 arrival with 1 items.*
# Final filter table:
# ID Src. Node Tgt. Node(s) Target type Partition filter Pending (Expected) First arrived Completed Enabled Bloom Size Est fpp Min value Max value In-list size
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-# 1 3 0 LOCAL false 0 (3) N/A N/A true IN_LIST PartialUpdates
+# 1 3 0 LOCAL false 0 (3) N/A N/A true IN_LIST LOCAL
# 0 4 1 REMOTE false 0 (3) 427.938ms 427.947ms true IN_LIST 5
select STRAIGHT_JOIN count(*)
from date_tbl t
@@ -251,7 +251,7 @@ row_regex: .*Filter 0 arrival with 5 items.*
# Final filter table:
# ID Src. Node Tgt. Node(s) Target type Partition filter Pending (Expected) First arrived Completed Enabled Bloom Size Est fpp Min value Max value In-list size
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-# 1 3 0 LOCAL false 0 (1) N/A N/A true IN_LIST PartialUpdates
+# 1 3 0 LOCAL false 0 (1) N/A N/A true IN_LIST LOCAL
# 0 4 1 REMOTE false 0 (1) 87.270ms 87.271ms true IN_LIST 1
select count(*)
from tpch_orc_def.supplier, tpch_orc_def.nation, tpch_orc_def.region