You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/04/09 11:11:49 UTC

[incubator-doris] branch master updated: [fix](join) Fix error bucket num get in bucket shuffle join in dynamic partition (#8891)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ce6b5169c2 [fix](join) Fix error bucket num get in bucket shuffle join in dynamic partition (#8891)
ce6b5169c2 is described below

commit ce6b5169c2249d56e44d988d4df37141ef914f2a
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sat Apr 9 19:11:44 2022 +0800

    [fix](join) Fix error bucket num get in bucket shuffle join in dynamic partition (#8891)
---
 be/src/service/CMakeLists.txt                            |  2 +-
 build.sh                                                 |  3 ---
 .../main/java/org/apache/doris/planner/OlapScanNode.java |  2 ++
 .../src/main/java/org/apache/doris/qe/Coordinator.java   |  4 +++-
 .../test/java/org/apache/doris/qe/CoordinatorTest.java   |  2 ++
 .../sql/agg_output_as_right_tale_left_outer_order.out    |  3 +++
 ...tale_left_outer_order.out => bucket_shuffle_join.out} |  6 +++---
 .../suites/join/ddl/test_bucket_shuffle_join.sql         | 16 ++++++++++++++++
 regression-test/suites/join/ddl/test_join.sql            |  4 ++--
 regression-test/suites/join/load.groovy                  |  9 ++++++---
 regression-test/suites/join/sql/bucket_shuffle_join.sql  |  1 +
 11 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index fecc5b4153..fb07d2cca7 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -44,7 +44,7 @@ if (${MAKE_TEST} STREQUAL "OFF")
     install(DIRECTORY DESTINATION ${OUTPUT_DIR}/lib/)
     install(TARGETS palo_be DESTINATION ${OUTPUT_DIR}/lib/)
 
-    if (${STRIP_DEBUG_INFO} STREQUAL "ON")
+    if ("${STRIP_DEBUG_INFO}" STREQUAL "ON")
         add_custom_command(TARGET palo_be POST_BUILD
             COMMAND ${CMAKE_OBJCOPY} --only-keep-debug $<TARGET_FILE:palo_be> $<TARGET_FILE:palo_be>.dbg
             COMMAND ${CMAKE_STRIP} --strip-debug --strip-unneeded $<TARGET_FILE:palo_be>
diff --git a/build.sh b/build.sh
index edc778dd19..faa2d34350 100755
--- a/build.sh
+++ b/build.sh
@@ -305,9 +305,6 @@ if [ "${BUILD_DOCS}" = "ON" ] ; then
 fi
 
 function build_ui() {
-    # check NPM env here, not in env.sh.
-    # Because UI should be considered a non-essential component at runtime.
-    # Only when the compilation is required, check the relevant compilation environment.
     NPM=npm
     if ! ${NPM} --version; then
         echo "Error: npm is not found"
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index 6310a7aaf8..0ba2e7a693 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -172,6 +172,8 @@ public class OlapScanNode extends ScanNode {
         setCanTurnOnPreAggr(false);
     }
 
+    public long getTotalTabletsNum() { return totalTabletsNum; }
+
     public boolean getForceOpenPreAgg() {
         return forceOpenPreAgg;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
index 330af06537..7d2e9fbc2a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
@@ -1787,7 +1787,9 @@ public class Coordinator {
         private void computeScanRangeAssignmentByBucket(
                 final OlapScanNode scanNode, ImmutableMap<Long, Backend> idToBackend, Map<TNetworkAddress, Long> addressToBackendID) throws Exception {
             if (!fragmentIdToSeqToAddressMap.containsKey(scanNode.getFragmentId())) {
-                fragmentIdToBucketNumMap.put(scanNode.getFragmentId(), scanNode.getOlapTable().getDefaultDistributionInfo().getBucketNum());
+                // The bucket shuffle join only hit when the partition is one. so the totalTabletsNum is all tablet of
+                // one hit partition. can be the right bucket num in bucket shuffle join
+                fragmentIdToBucketNumMap.put(scanNode.getFragmentId(), (int)scanNode.getTotalTabletsNum());
                 fragmentIdToSeqToAddressMap.put(scanNode.getFragmentId(), new HashedMap());
                 fragmentIdBucketSeqToScanRangeMap.put(scanNode.getFragmentId(), new BucketSeqToScanRange());
                 fragmentIdToBuckendIdBucketCountMap.put(scanNode.getFragmentId(), new HashMap<>());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java
index fb86df6e33..e355f6c247 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java
@@ -235,6 +235,7 @@ public class CoordinatorTest extends Coordinator {
         }
 
         Deencapsulation.setField(olapScanNode, "bucketSeq2locations", bucketseq2localtion);
+        Deencapsulation.setField(olapScanNode, "totalTabletsNum", 66);
         olapScanNode.setFragment(new PlanFragment(planFragmentId, olapScanNode,
                 new DataPartition(TPartitionType.UNPARTITIONED)));
 
@@ -357,6 +358,7 @@ public class CoordinatorTest extends Coordinator {
         }
 
         Deencapsulation.setField(olapScanNode, "bucketSeq2locations", bucketseq2localtion);
+        Deencapsulation.setField(olapScanNode, "totalTabletsNum", 66);
         olapScanNode.setFragment(new PlanFragment(planFragmentId, olapScanNode,
                 new DataPartition(TPartitionType.UNPARTITIONED)));
 
diff --git a/regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out b/regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out
index e13e459980..67878579f0 100644
--- a/regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out
+++ b/regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out
@@ -2,4 +2,7 @@
 -- !agg_output_as_right_tale_left_outer_order --
 1	1
 2	2
+3	3
+4	4
+5	5
 
diff --git a/regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out b/regression-test/data/join/sql/bucket_shuffle_join.out
similarity index 59%
copy from regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out
copy to regression-test/data/join/sql/bucket_shuffle_join.out
index e13e459980..87f57761ba 100644
--- a/regression-test/data/join/sql/agg_output_as_right_tale_left_outer_order.out
+++ b/regression-test/data/join/sql/bucket_shuffle_join.out
@@ -1,5 +1,5 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
--- !agg_output_as_right_tale_left_outer_order --
-1	1
-2	2
+-- !bucket_shuffle_join --
+1	2021-12-01T00:00
+2	2021-12-01T00:00
 
diff --git a/regression-test/suites/join/ddl/test_bucket_shuffle_join.sql b/regression-test/suites/join/ddl/test_bucket_shuffle_join.sql
new file mode 100644
index 0000000000..7d461bf86a
--- /dev/null
+++ b/regression-test/suites/join/ddl/test_bucket_shuffle_join.sql
@@ -0,0 +1,16 @@
+CREATE TABLE `test_bucket_shuffle_join` (
+  `id` int(11) NOT NULL COMMENT "",
+  `rectime` datetime NOT NULL COMMENT ""
+) ENGINE=OLAP
+UNIQUE KEY(`id`, `rectime`)
+COMMENT "olap"
+PARTITION BY RANGE(`rectime`)
+(
+PARTITION p202111 VALUES [('2021-11-01 00:00:00'), ('2021-12-01 00:00:00')))
+DISTRIBUTED BY HASH(`id`) BUCKETS 10
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"in_memory" = "false",
+"storage_format" = "V2"
+)
+
diff --git a/regression-test/suites/join/ddl/test_join.sql b/regression-test/suites/join/ddl/test_join.sql
index 02c0ffe72f..fe9a2c6e90 100644
--- a/regression-test/suites/join/ddl/test_join.sql
+++ b/regression-test/suites/join/ddl/test_join.sql
@@ -3,9 +3,9 @@ CREATE TABLE test_join (
 ) ENGINE=OLAP
 UNIQUE KEY(`k1`)
 COMMENT "olap"
-DISTRIBUTED BY HASH(`k1`) BUCKETS 20
+DISTRIBUTED BY HASH(`k1`) BUCKETS 10
 PROPERTIES (
 "in_memory" = "false",
 "storage_format" = "V2",
 "replication_num" = "1"
-);
+);
\ No newline at end of file
diff --git a/regression-test/suites/join/load.groovy b/regression-test/suites/join/load.groovy
index a80a00e615..60c429b068 100644
--- a/regression-test/suites/join/load.groovy
+++ b/regression-test/suites/join/load.groovy
@@ -20,7 +20,7 @@
 // and modified by Doris.
 
 suite("load") {
-    def tables=["test_join"]
+    def tables=["test_join", "test_bucket_shuffle_join"]
 
     for (String table in tables) {
         sql """ DROP TABLE IF EXISTS $table """
@@ -30,6 +30,9 @@ suite("load") {
         sql new File("""${context.file.parent}/ddl/${table}.sql""").text
     }
 
-    sql """ insert into test_join select 1 """
-    sql """ insert into test_join select 2 """
+    sql """ insert into test_join values(1),(2),(3),(4),(5) """
+    sql """ ALTER TABLE test_bucket_shuffle_join ADD PARTITION p202112 
+            VALUES LESS THAN ("2022-01-01 00:00:00") DISTRIBUTED BY HASH(id) BUCKETS 2;"""
+    sql """ insert into test_bucket_shuffle_join values(1, "2021-12-01 00:00:00"),
+        (2, "2021-12-01 00:00:00"), (3, "2021-12-01 00:00:00")"""
 }
diff --git a/regression-test/suites/join/sql/bucket_shuffle_join.sql b/regression-test/suites/join/sql/bucket_shuffle_join.sql
new file mode 100644
index 0000000000..807613e2e4
--- /dev/null
+++ b/regression-test/suites/join/sql/bucket_shuffle_join.sql
@@ -0,0 +1 @@
+select * from test_bucket_shuffle_join where rectime="2021-12-01 00:00:00" and id in (select k1 from test_join where k1 in (1,2))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org