You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2021/05/26 23:51:38 UTC

[impala] 02/03: IMPALA-10197 (Part 2): Add KUDU_REPLICA_SELECTION query option

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4c07aff664beddf0d0f8e93a45e936176700a31b
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Mon May 24 18:28:32 2021 -0700

    IMPALA-10197 (Part 2): Add KUDU_REPLICA_SELECTION query option
    
    The previous patch added a new test case in PlannerTest for Kudu,
    which check for specific number of hosts and instances for distributed
    plan with query option KUDU_REPLICA_SELECTION set as LEADER_ONLY.
    However, the leadership isn't deterministic since each Kudu partition
    has its own Raft group and the leaders of these groups are not exactly
    balanced across hosts in a cluster. There's no guarantee that we'll get
    a certain number of leader hosts for a query which access multiple KUDU
    partitions. This makes the unit-test flaky.
    
    This patch removed the distributed plans to avoid nondeterminism.
    
    Testing:
     - Reran the Planner test and verified the issue did not happen.
    
    Change-Id: I3e23667c06c273a261e03de3d81fc7ee1f6b0682
    Reviewed-on: http://gerrit.cloudera.org:8080/17502
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../kudu-replica-selection-closest-replica.test    | 96 ++-------------------
 .../kudu-replica-selection-leader-only.test        | 97 ++--------------------
 2 files changed, 11 insertions(+), 182 deletions(-)

diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-closest-replica.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-closest-replica.test
index c15a708..7326ac9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-closest-replica.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-closest-replica.test
@@ -1,35 +1,14 @@
 select * from functional_kudu.zipcode_incomes where id = '8600000US00601'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=1.88MB mem-reservation=0B thread-reservation=2
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
   |
   00:SCAN KUDU [functional_kudu.zipcode_incomes]
      kudu predicates: id = '8600000US00601'
-     mem-estimate=1.88MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
-Per-Host Resources: mem-estimate=1.88MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
-  00:SCAN KUDU [functional_kudu.zipcode_incomes]
-     kudu predicates: id = '8600000US00601'
-     mem-estimate=1.88MB mem-reservation=0B thread-reservation=1
+     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=1
      in pipelines: 00(GETNEXT)
 ====
@@ -37,7 +16,7 @@ Per-Host Resources: mem-estimate=1.88MB mem-reservation=0B thread-reservation=2
 select * from functional_kudu.zipcode_incomes where id != '1' and zip = '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
@@ -48,33 +27,11 @@ Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
      mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=1
      in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
-  00:SCAN KUDU [functional_kudu.zipcode_incomes]
-     predicates: id != '1'
-     kudu predicates: zip = '2'
-     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
 ====
 select * from functional_kudu.zipcode_incomes where id > '1' and zip > '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
@@ -84,32 +41,11 @@ Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
      mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=3.32K
      in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=517.93KB mem-reservation=0B thread-reservation=1
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=517.93KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=3.32K
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
-  00:SCAN KUDU [functional_kudu.zipcode_incomes]
-     kudu predicates: zip > '2', id > '1'
-     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=3.32K
-     in pipelines: 00(GETNEXT)
 ====
 select * from functional_kudu.zipcode_incomes where id = '1' or id = '2' or zip = '3'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
@@ -119,26 +55,4 @@ Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
      mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=3
      in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=16.00KB mem-reservation=0B thread-reservation=1
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=3
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
-  00:SCAN KUDU [functional_kudu.zipcode_incomes]
-     predicates: id IN ('1', '2') OR zip = '3'
-     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=3
-     in pipelines: 00(GETNEXT)
 ====
-
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-leader-only.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-leader-only.test
index 20d958f..44c2f0c 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-leader-only.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-replica-selection-leader-only.test
@@ -11,133 +11,48 @@ Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservatio
      mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=1
      in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
-Per-Host Resources: mem-estimate=3.75MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
-  00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
-     kudu predicates: id = '8600000US00601'
-     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
 ====
 # The cardinality from "zip = '2'" should dominate.
 select * from functional_kudu.zipcode_incomes where id != '1' and zip = '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=9.62MB mem-reservation=4.00MB thread-reservation=2
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
-     predicates: id != '1'
-     kudu predicates: zip = '2'
-     mem-estimate=5.62MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
   |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=1
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
-Per-Host Resources: mem-estimate=5.62MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
   00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
      predicates: id != '1'
      kudu predicates: zip = '2'
-     mem-estimate=5.62MB mem-reservation=0B thread-reservation=1
+     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=1
      in pipelines: 00(GETNEXT)
 ====
 select * from functional_kudu.zipcode_incomes where id > '1' and zip > '2'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=9.62MB mem-reservation=4.00MB thread-reservation=2
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
-     kudu predicates: zip > '2', id > '1'
-     mem-estimate=5.62MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=3.32K
-     in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=4.45MB mem-reservation=4.00MB thread-reservation=1
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
   |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=456.89KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=3.32K
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
-Per-Host Resources: mem-estimate=5.62MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
   00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
      kudu predicates: zip > '2', id > '1'
-     mem-estimate=5.62MB mem-reservation=0B thread-reservation=1
+     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=3.32K
      in pipelines: 00(GETNEXT)
 ====
 select * from functional_kudu.zipcode_incomes where id = '1' or id = '2' or zip = '3'
 ---- PLAN
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=9.62MB mem-reservation=4.00MB thread-reservation=2
-  PLAN-ROOT SINK
-  |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
-  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
-  |
-  00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
-     predicates: id IN ('1', '2') OR zip = '3'
-     mem-estimate=5.62MB mem-reservation=0B thread-reservation=1
-     tuple-ids=0 row-size=124B cardinality=3
-     in pipelines: 00(GETNEXT)
----- DISTRIBUTEDPLAN
-F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
-Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1
+Per-Host Resources: mem-estimate=7.75MB mem-reservation=4.00MB thread-reservation=2
   PLAN-ROOT SINK
   |  output exprs: functional_kudu.zipcode_incomes.id, functional_kudu.zipcode_incomes.zip, functional_kudu.zipcode_incomes.description1, functional_kudu.zipcode_incomes.description2, functional_kudu.zipcode_incomes.income
   |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
   |
-  01:EXCHANGE [UNPARTITIONED]
-     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
-     tuple-ids=0 row-size=124B cardinality=3
-     in pipelines: 00(GETNEXT)
-
-F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
-Per-Host Resources: mem-estimate=5.62MB mem-reservation=0B thread-reservation=2
-  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
-  |  mem-estimate=0B mem-reservation=0B thread-reservation=0
   00:SCAN KUDU [functional_kudu.zipcode_incomes, LEADER-only]
      predicates: id IN ('1', '2') OR zip = '3'
-     mem-estimate=5.62MB mem-reservation=0B thread-reservation=1
+     mem-estimate=3.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=124B cardinality=3
      in pipelines: 00(GETNEXT)
 ====