You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/18 08:34:35 UTC

[incubator-doris] branch dev-1.0.1 updated (c41618a0b6 -> 3c414e75b8)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


    from c41618a0b6 [fix](planner)VecNotImplException thrown when query need rewrite and some slot cannot changed to nullable (#9589)
     new ca952c91bb [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
     new b20df704b1 [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)
     new 66b50f1106 [config] Remove some old config and session variable (#9495)
     new db2d9d160d [improment](planner) push down predicate past two phase aggregate (#9498)
     new 9c00587a57 [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization (#9547)
     new f97b9d9387 [Improvement] reduce string size in serialization (#9550)
     new 4315dc8493 [fix](storage-vectorized) fix VMergeIterator core dump (#9564)
     new 0562bab929 [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)
     new 3c414e75b8 [BUG] fix information_schema.columns results not correctly on vec engine (#9612)

The 9 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/olap/comparison_predicate.cpp               |   7 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  27 +-
 be/src/vec/columns/column_dictionary.h             |  30 +--
 be/src/vec/columns/column_string.cpp               |   4 +-
 be/src/vec/exec/vmysql_scan_node.cpp               |   1 +
 be/src/vec/exec/vodbc_scan_node.cpp                |   1 +
 be/src/vec/exec/vschema_scan_node.cpp              |  10 +-
 be/src/vec/olap/vgeneric_iterators.cpp             |   4 +-
 be/src/vec/sink/vtablet_sink.cpp                   | 209 +++++++-------
 be/src/vec/sink/vtablet_sink.h                     |   6 +-
 docs/en/administrator-guide/config/fe_config.md    |  26 +-
 .../load-data/binlog-load-manual.md                |   4 -
 docs/zh-CN/administrator-guide/config/fe_config.md |  26 +-
 .../load-data/binlog-load-manual.md                |   6 +-
 .../doris/analysis/CreateDataSyncJobStmt.java      |   6 -
 .../doris/analysis/CreateMaterializedViewStmt.java |   4 -
 .../org/apache/doris/analysis/LateralViewRef.java  |   4 -
 .../org/apache/doris/catalog/TableProperty.java    |  11 +-
 .../org/apache/doris/clone/TabletSchedCtx.java     |  24 ++
 .../org/apache/doris/clone/TabletScheduler.java    |  38 +--
 .../main/java/org/apache/doris/common/Config.java  |  12 -
 .../apache/doris/planner/SingleNodePlanner.java    | 299 ++++++++++++---------
 .../java/org/apache/doris/qe/SessionVariable.java  |  11 -
 .../java/org/apache/doris/alter/AlterTest.java     |   1 -
 .../org/apache/doris/alter/RollupJobV2Test.java    |   5 +-
 .../doris/analysis/CreateDataSyncJobStmtTest.java  |   6 +-
 .../analysis/CreateMaterializedViewStmtTest.java   |   5 +-
 .../planner/MaterializedViewFunctionTest.java      |   2 +-
 .../java/org/apache/doris/planner/PlannerTest.java |   1 -
 .../doris/planner/TableFunctionPlanTest.java       |   1 -
 .../java/org/apache/doris/utframe/DorisAssert.java |   7 -
 .../data/account/test_information_schema.out       |  31 +++
 .../test_delete.out}                               |  17 +-
 33 files changed, 442 insertions(+), 404 deletions(-)
 create mode 100644 regression-test/data/account/test_information_schema.out
 copy regression-test/data/{correctness/test_select_variance_agg.out => delete/test_delete.out} (58%)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 09/09: [BUG] fix information_schema.columns results not correctly on vec engine (#9612)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 3c414e75b8ef7aaba973501c9beb5415b0387540
Author: camby <10...@qq.com>
AuthorDate: Wed May 18 07:44:32 2022 +0800

    [BUG] fix information_schema.columns results not correctly on vec engine (#9612)
    
    * VSchemaScanNode get_next bugfix
    
    * add regression-test case for VSchemaScanNode
    
    Co-authored-by: cambyzju <zh...@baidu.com>
---
 be/src/vec/exec/vmysql_scan_node.cpp               |  1 +
 be/src/vec/exec/vodbc_scan_node.cpp                |  1 +
 be/src/vec/exec/vschema_scan_node.cpp              | 10 ++++---
 .../data/account/test_information_schema.out       | 31 ++++++++++++++++++++++
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/vmysql_scan_node.cpp b/be/src/vec/exec/vmysql_scan_node.cpp
index 980fb57eb8..d5cec3e215 100644
--- a/be/src/vec/exec/vmysql_scan_node.cpp
+++ b/be/src/vec/exec/vmysql_scan_node.cpp
@@ -46,6 +46,7 @@ Status VMysqlScanNode::get_next(RuntimeState* state, vectorized::Block* block, b
     bool mysql_eos = false;
 
     do {
+        columns.resize(_slot_num);
         for (int i = 0; i < _slot_num; ++i) {
             if (mem_reuse) {
                 columns[i] = std::move(*block->get_by_position(i).column).mutate();
diff --git a/be/src/vec/exec/vodbc_scan_node.cpp b/be/src/vec/exec/vodbc_scan_node.cpp
index dc685c57cc..2669858454 100644
--- a/be/src/vec/exec/vodbc_scan_node.cpp
+++ b/be/src/vec/exec/vodbc_scan_node.cpp
@@ -58,6 +58,7 @@ Status VOdbcScanNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     do {
         RETURN_IF_CANCELLED(state);
 
+        columns.resize(column_size);
         for (auto i = 0; i < column_size; i++) {
             if (mem_reuse) {
                 columns[i] = std::move(*block->get_by_position(i).column).mutate();
diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp
index dfd2c08dcf..5a8968ed58 100644
--- a/be/src/vec/exec/vschema_scan_node.cpp
+++ b/be/src/vec/exec/vschema_scan_node.cpp
@@ -64,12 +64,14 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block,
     if (!_is_init) return Status::InternalError("used before initialize.");
     RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
     RETURN_IF_CANCELLED(state);
-    bool mem_reuse = block->mem_reuse();
-    DCHECK(block->rows() == 0);
     std::vector<vectorized::MutableColumnPtr> columns(_slot_num);
     bool schema_eos = false;
 
     do {
+        bool mem_reuse = block->mem_reuse();
+        DCHECK(block->rows() == 0);
+
+        columns.resize(_slot_num);
         for (int i = 0; i < _slot_num; ++i) {
             if (mem_reuse) {
                 columns[i] = std::move(*block->get_by_position(i).column).mutate();
@@ -81,8 +83,8 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block,
             RETURN_IF_CANCELLED(state);
 
             // get all slots from schema table.
-            RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_single_tuple, _tuple_pool.get(), &schema_eos));
-
+            RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_single_tuple, _tuple_pool.get(),
+                                                          &schema_eos));
             if (schema_eos) {
                 *eos = true;
                 break;
diff --git a/regression-test/data/account/test_information_schema.out b/regression-test/data/account/test_information_schema.out
new file mode 100644
index 0000000000..1e6a3994f3
--- /dev/null
+++ b/regression-test/data/account/test_information_schema.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+612
+
+-- !sql --
+578
+
+-- !sql --
+544
+
+-- !sql --
+510
+
+-- !sql --
+476
+
+-- !sql --
+612
+
+-- !sql --
+578
+
+-- !sql --
+544
+
+-- !sql --
+510
+
+-- !sql --
+476
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 03/09: [config] Remove some old config and session variable (#9495)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 66b50f11066ac73799651e5decc2ea354387f650
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Tue May 17 22:37:11 2022 +0800

    [config] Remove some old config and session variable (#9495)
    
    1. Remove session variable "enable_lateral_view"
    2. Remove Fe config: enable_materialized_view
    3. Remove Fe config: enable_create_sync_job
    4. Fe config dynamic_partition_enable is only used for disable dynamic partition scheduler.
---
 docs/en/administrator-guide/config/fe_config.md    | 26 +---------------------
 .../load-data/binlog-load-manual.md                |  4 ----
 docs/zh-CN/administrator-guide/config/fe_config.md | 26 +---------------------
 .../load-data/binlog-load-manual.md                |  6 +----
 .../doris/analysis/CreateDataSyncJobStmt.java      |  6 -----
 .../doris/analysis/CreateMaterializedViewStmt.java |  4 ----
 .../org/apache/doris/analysis/LateralViewRef.java  |  4 ----
 .../org/apache/doris/catalog/TableProperty.java    | 11 +--------
 .../main/java/org/apache/doris/common/Config.java  | 12 ----------
 .../java/org/apache/doris/qe/SessionVariable.java  | 11 ---------
 .../java/org/apache/doris/alter/AlterTest.java     |  1 -
 .../org/apache/doris/alter/RollupJobV2Test.java    |  5 ++++-
 .../doris/analysis/CreateDataSyncJobStmtTest.java  |  6 ++---
 .../analysis/CreateMaterializedViewStmtTest.java   |  5 ++++-
 .../planner/MaterializedViewFunctionTest.java      |  2 +-
 .../doris/planner/TableFunctionPlanTest.java       |  1 -
 .../java/org/apache/doris/utframe/DorisAssert.java |  7 ------
 17 files changed, 16 insertions(+), 121 deletions(-)

diff --git a/docs/en/administrator-guide/config/fe_config.md b/docs/en/administrator-guide/config/fe_config.md
index 281d841ff5..e68e6a56ee 100644
--- a/docs/en/administrator-guide/config/fe_config.md
+++ b/docs/en/administrator-guide/config/fe_config.md
@@ -437,20 +437,6 @@ MasterOnly：true
 
 As long as one BE is down, Routine Load cannot be automatically restored 
 
-### enable_materialized_view
-
-Default：true
-
-IsMutable：true
-
-MasterOnly：true
-
-This configuration is used to turn on and off the creation of materialized views. If set to true, the function to create a materialized view is enabled. The user can create a materialized view through the `CREATE MATERIALIZED VIEW` command. If set to false, materialized views cannot be created.
-
-If you get an error `The materialized view is coming soon` or `The materialized view is disabled` when creating the materialized view, it means that the configuration is set to false and the function of creating the materialized view is turned off. You can start to create a materialized view by modifying the configuration to true.
-
-This variable is a dynamic configuration, and users can modify the configuration through commands after the FE process starts. You can also modify the FE configuration file and restart the FE to take effect
-
 ### check_java_version
 
 Default：true
@@ -475,7 +461,7 @@ IsMutable：true
 
 MasterOnly：true
 
-Whether to enable dynamic partition, enabled by default
+Whether to enable dynamic partition scheduler, enabled by default
 
 ### dynamic_partition_check_interval_seconds
 
@@ -2152,16 +2138,6 @@ Only for Master FE: false
 
 If set to true, the compaction slower replica will be skipped when select get queryable replicas
 
-### enable_create_sync_job
-
-Enable Mysql data synchronization job function. The default is false, this function is turned off
-
-Default: false
-
-Is it possible to configure dynamically: true
-
-Whether it is a configuration item unique to the Master FE node: true
-
 ### sync_commit_interval_second
 
 The maximum time interval for committing transactions. If there is still data in the channel that has not been submitted after this time, the consumer will notify the channel to submit the transaction.
diff --git a/docs/en/administrator-guide/load-data/binlog-load-manual.md b/docs/en/administrator-guide/load-data/binlog-load-manual.md
index a13e52000d..6142828408 100644
--- a/docs/en/administrator-guide/load-data/binlog-load-manual.md
+++ b/docs/en/administrator-guide/load-data/binlog-load-manual.md
@@ -478,10 +478,6 @@ You can use `HELP STOP SYNC JOB;`, `HELP PAUSE SYNC JOB`; And `HELP RESUME SYNC
 ### Fe configuration
 
 The following configuration belongs to the system level configuration of SyncJob. The configuration value can be modified in configuration file fe.conf.
-
-* `enable_create_sync_job`
-
-	Turn on the Binlog Load feature. The default value is false. This feature is turned off.
 	
 * `sync_commit_interval_second`
 
diff --git a/docs/zh-CN/administrator-guide/config/fe_config.md b/docs/zh-CN/administrator-guide/config/fe_config.md
index e16e58c365..e082155212 100644
--- a/docs/zh-CN/administrator-guide/config/fe_config.md
+++ b/docs/zh-CN/administrator-guide/config/fe_config.md
@@ -423,20 +423,6 @@ show data （其他用法：HELP SHOW DATA）
 
 只要有一个BE宕机，Routine Load 就无法自动恢复
 
-### `enable_materialized_view`
-
-默认值：true
-
-是否可以动态配置：true
-
-是否为 Master FE 节点独有的配置项：true
-
-该配置用于开启和关闭创建物化视图功能。如果设置为 true，则创建物化视图功能开启。用户可以通过 `CREATE MATERIALIZED VIEW` 命令创建物化视图。如果设置为 false，则无法创建物化视图。
-
-如果在创建物化视图的时候报错 `The materialized view is coming soon` 或 `The materialized view is disabled` 则说明改配置被设置为了 false，创建物化视图功能关闭了。可以通过修改配置为 true 来启动创建物化视图功能。
-
-该变量为动态配置，用户可以在 FE 进程启动后，通过命令修改配置。也可以通过修改 FE 的配置文件，重启 FE 来生效
-
 ### `check_java_version`
 
 默认值：true
@@ -461,7 +447,7 @@ Doris 将检查已编译和运行的 Java 版本是否兼容，如果不兼容
 
 是否为 Master FE 节点独有的配置项：true
 
-是否启用动态分区，默认启用
+是否启用动态分区调度，默认启用
 
 ### `dynamic_partition_check_interval_seconds`
 
@@ -2175,16 +2161,6 @@ load 标签清理器将每隔 `label_clean_interval_second` 运行一次以清
 
 如果设置为true，则在选择可查询副本时，将跳过 compaction 较慢的副本
 
-### enable_create_sync_job
-
-开启 MySQL 数据同步作业功能。默认是 false，关闭此功能
-
-默认值：false
-
-是否可以动态配置：true
-
-是否为 Master FE 节点独有的配置项：true
-
 ### sync_commit_interval_second
 
 提交事务的最大时间间隔。若超过了这个时间 channel 中还有数据没有提交，consumer 会通知 channel 提交事务。
diff --git a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md b/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
index 8862a0a113..22ddf81775 100644
--- a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
+++ b/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md
@@ -458,10 +458,6 @@ ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE";
 
 下面配置属于数据同步作业的系统级别配置，主要通过修改 fe.conf 来调整配置值。
 
-* `enable_create_sync_job`
-
-	开启数据同步作业功能。默认为 false，关闭此功能。
-
 * `sync_commit_interval_second`
 
 	提交事务的最大时间间隔。若超过了这个时间channel中还有数据没有提交，consumer会通知channel提交事务。
@@ -499,4 +495,4 @@ ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE";
 4. 为什么数据同步时浮点类型的数据精度在Mysql端和Doris端不一样？
 
 	Doris本身浮点类型的精度与Mysql不一样。可以选择用Decimal类型代替。
-	
\ No newline at end of file
+	
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateDataSyncJobStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateDataSyncJobStmt.java
index 60f02aa44d..7c939e6a41 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateDataSyncJobStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateDataSyncJobStmt.java
@@ -23,7 +23,6 @@ import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.cluster.ClusterNamespace;
 import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.UserException;
@@ -79,11 +78,6 @@ public class CreateDataSyncJobStmt extends DdlStmt {
         }
         dbName = ClusterNamespace.getFullName(analyzer.getClusterName(), dbName);
 
-        if (!Config.enable_create_sync_job) {
-            throw new AnalysisException("Mysql sync job is disabled." +
-                    " Set config 'enable_create_sync_job' = 'true' to enable this feature. ");
-        }
-
         if (binlogDesc != null) {
             binlogDesc.analyze();
             dataSyncJobType = binlogDesc.getDataSyncJobType();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
index 420053358f..b8125417ee 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
@@ -24,7 +24,6 @@ import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.FeConstants;
@@ -131,9 +130,6 @@ public class CreateMaterializedViewStmt extends DdlStmt {
 
     @Override
     public void analyze(Analyzer analyzer) throws UserException {
-        if (!Config.enable_materialized_view) {
-            throw new AnalysisException("The materialized view is disabled");
-        }
         super.analyze(analyzer);
         FeNameFormat.checkTableName(mvName);
         // TODO(ml): The mv name in from clause should pass the analyze without error.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java
index 3af8b5481c..4728dc2c60 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LateralViewRef.java
@@ -62,10 +62,6 @@ public class LateralViewRef extends TableRef {
 
     @Override
     public void analyze(Analyzer analyzer) throws UserException {
-        if (!analyzer.getContext().getSessionVariable().isEnableLateralView()) {
-            throw new AnalysisException("The session variables `enable_lateral_view` is false");
-        }
-
         if (isAnalyzed) {
             return;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java
index 38d85dc4c1..f007df52a2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java
@@ -19,8 +19,6 @@ package org.apache.doris.catalog;
 
 import org.apache.doris.analysis.DataSortInfo;
 import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.Config;
-import org.apache.doris.common.DdlException;
 import org.apache.doris.common.FeMetaVersion;
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.io.Writable;
@@ -115,14 +113,7 @@ public class TableProperty implements Writable {
         return this;
     }
 
-    public TableProperty buildDynamicProperty() throws DdlException {
-        if (properties.containsKey(DynamicPartitionProperty.ENABLE)
-                && Boolean.valueOf(properties.get(DynamicPartitionProperty.ENABLE))
-                && !Config.dynamic_partition_enable) {
-            throw new DdlException("Could not create table with dynamic partition "
-                    + "when fe config dynamic_partition_enable is false. "
-                    + "Please ADMIN SET FRONTEND CONFIG (\"dynamic_partition_enable\" = \"true\") firstly.");
-        }
+    public TableProperty buildDynamicProperty() {
         executeBuildDynamicProperty();
         return this;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 82552ff2b3..14e827f3ab 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1267,18 +1267,6 @@ public class Config extends ConfigBase {
     @ConfField
     public static boolean check_java_version = true;
 
-    /**
-     * control materialized view
-     */
-    @ConfField(mutable = true, masterOnly = true)
-    public static boolean enable_materialized_view = true;
-
-    /**
-     * enable create sync job
-     */
-    @ConfField(mutable = true, masterOnly = true)
-    public static boolean enable_create_sync_job = false;
-
     /**
      * it can't auto-resume routine load job as long as one of the backends is down
      */
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 8bc47eac47..e2aa801283 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -415,9 +415,6 @@ public class SessionVariable implements Serializable, Writable {
     @VariableMgr.VarAttr(name = CPU_RESOURCE_LIMIT)
     public int cpuResourceLimit = -1;
 
-    @VariableMgr.VarAttr(name = ENABLE_LATERAL_VIEW, needForward = true)
-    public boolean enableLateralView = false;
-
     @VariableMgr.VarAttr(name = DISABLE_JOIN_REORDER)
     private boolean disableJoinReorder = false;
 
@@ -876,14 +873,6 @@ public class SessionVariable implements Serializable, Writable {
         return enableParallelOutfile;
     }
 
-    public boolean isEnableLateralView() {
-        return enableLateralView;
-    }
-
-    public void setEnableLateralView(boolean enableLateralView) {
-        this.enableLateralView = enableLateralView;
-    }
-
     public boolean isDisableJoinReorder() {
         return disableJoinReorder;
     }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java
index 08569cdc1a..77b0ca7a6f 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java
@@ -65,7 +65,6 @@ public class AlterTest {
     public static void beforeClass() throws Exception {
         FeConstants.runningUnitTest = true;
         FeConstants.default_scheduler_interval_millisecond = 100;
-        Config.dynamic_partition_enable = true;
         Config.dynamic_partition_check_interval_seconds = 1;
         Config.disable_storage_medium_check = true;
         UtFrameUtils.createDorisCluster(runningDir);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java b/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java
index 6e6b18536b..39d41054a7 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java
@@ -79,6 +79,10 @@ import java.lang.reflect.InvocationTargetException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import mockit.Expectations;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.Mocked;
 
 import mockit.Expectations;
 import mockit.Mock;
@@ -317,7 +321,6 @@ public class RollupJobV2Test {
     @Test
     public void testSerializeOfRollupJob(@Mocked CreateMaterializedViewStmt stmt) throws IOException,
             AnalysisException {
-        Config.enable_materialized_view = true;
         // prepare file
         File file = new File(fileName);
         file.createNewFile();
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateDataSyncJobStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateDataSyncJobStmtTest.java
index de3ef16cf6..6350942130 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateDataSyncJobStmtTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateDataSyncJobStmtTest.java
@@ -21,7 +21,6 @@ import org.apache.doris.catalog.Catalog;
 import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.common.Config;
 import org.apache.doris.common.UserException;
 import org.apache.doris.load.sync.DataSyncJobType;
 import org.apache.doris.mysql.privilege.PaloAuth;
@@ -39,6 +38,9 @@ import org.junit.Test;
 
 import java.util.List;
 import java.util.Map;
+import mockit.Expectations;
+import mockit.Injectable;
+import mockit.Mocked;
 
 import mockit.Expectations;
 import mockit.Injectable;
@@ -93,8 +95,6 @@ public class CreateDataSyncJobStmtTest {
                 result = catalog;
             }
         };
-
-        Config.enable_create_sync_job = true;
     }
     @Test
     public void testNoDb() {
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateMaterializedViewStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateMaterializedViewStmtTest.java
index 50932d5d7d..6a9e550f9c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateMaterializedViewStmtTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateMaterializedViewStmtTest.java
@@ -39,6 +39,9 @@ import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.List;
+import mockit.Expectations;
+import mockit.Injectable;
+import mockit.Mocked;
 
 import mockit.Expectations;
 import mockit.Injectable;
@@ -57,7 +60,7 @@ public class CreateMaterializedViewStmtTest {
 
     @Before
     public void initTest() {
-        Deencapsulation.setField(Config.class, "enable_materialized_view", true);
+
     }
 
     @Test
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java
index 09d28a719e..a980ff9a67 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java
@@ -58,7 +58,7 @@ public class MaterializedViewFunctionTest {
         FeConstants.runningUnitTest = true;
         UtFrameUtils.createDorisCluster(runningDir);
         dorisAssert = new DorisAssert();
-        dorisAssert.withEnableMV().withDatabase(HR_DB_NAME).useDatabase(HR_DB_NAME);
+        dorisAssert.withDatabase(HR_DB_NAME).useDatabase(HR_DB_NAME);
     }
 
     @Before
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java
index 15d84f7f39..0480791baf 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/TableFunctionPlanTest.java
@@ -48,7 +48,6 @@ public class TableFunctionPlanTest {
     public static void setUp() throws Exception {
         UtFrameUtils.createDorisCluster(runningDir);
         ctx = UtFrameUtils.createDefaultCtx();
-        ctx.getSessionVariable().setEnableLateralView(true);
         String createDbStmtStr = "create database db1;";
         CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, ctx);
         Catalog.getCurrentCatalog().createDb(createDbStmt);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/DorisAssert.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/DorisAssert.java
index 520352d654..d6021c1dd6 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/utframe/DorisAssert.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/DorisAssert.java
@@ -32,7 +32,6 @@ import org.apache.doris.analysis.StatementBase;
 import org.apache.doris.catalog.Catalog;
 import org.apache.doris.cluster.ClusterNamespace;
 import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.Config;
 import org.apache.doris.common.util.SqlParserUtils;
 import org.apache.doris.planner.Planner;
 import org.apache.doris.qe.ConnectContext;
@@ -62,12 +61,6 @@ public class DorisAssert {
         this.ctx = ctx;
     }
 
-    public DorisAssert withEnableMV() {
-        ctx.getSessionVariable().setTestMaterializedView(true);
-        Config.enable_materialized_view = true;
-        return this;
-    }
-
     public DorisAssert withDatabase(String dbName) throws Exception {
         CreateDbStmt createDbStmt =
                 (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt("create database " + dbName + ";", ctx);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 07/09: [fix](storage-vectorized) fix VMergeIterator core dump (#9564)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 4315dc849356993f1468f35719ff7e6e3ce8abd9
Author: Xin Liao <li...@126.com>
AuthorDate: Tue May 17 11:58:59 2022 +0800

    [fix](storage-vectorized) fix VMergeIterator core dump (#9564)
    
    It could be re appeared on rowset with many segment, it means segment overlap. Maybe could not reappear it easily.
---
 be/src/vec/olap/vgeneric_iterators.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp
index 6d18ee5dc5..e63bb8d28a 100644
--- a/be/src/vec/olap/vgeneric_iterators.cpp
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -179,9 +179,7 @@ public:
         vectorized::Block& src = _block;
         vectorized::Block& dst = *block;
 
-        auto columns = _iter->schema().columns();
-
-        for (size_t i = 0; i < columns.size(); ++i) {
+        for (size_t i = 0; i < _iter->schema().num_column_ids(); ++i) {
             vectorized::ColumnWithTypeAndName s_col = src.get_by_position(i);
             vectorized::ColumnWithTypeAndName d_col = dst.get_by_position(i);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 04/09: [improment](planner) push down predicate past two phase aggregate (#9498)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit db2d9d160d727775fc92692418d4ea1a17de05af
Author: morrySnow <10...@users.noreply.github.com>
AuthorDate: Wed May 18 10:09:39 2022 +0800

    [improment](planner) push down predicate past two phase aggregate (#9498)
    
    Push down predicate past aggregate cannot push down predicate past 2 phase aggregate.
    
    origin plan is like this:
    ```
    second phase agg (conjuncts on olap scan node tuples)
    |
    first phase agg
    |
    olap scan node
    ```
    should be optimized to
    ```
    second phase agg
    |
    first phase agg
    |
    olap scan node (conjuncts on olap scan node tuples)
    ```
---
 .../apache/doris/planner/SingleNodePlanner.java    | 299 ++++++++++++---------
 .../java/org/apache/doris/planner/PlannerTest.java |   1 -
 2 files changed, 176 insertions(+), 124 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index b4ab81b17d..0f92c4d803 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -25,7 +25,6 @@ import org.apache.doris.analysis.BaseTableRef;
 import org.apache.doris.analysis.BinaryPredicate;
 import org.apache.doris.analysis.CaseExpr;
 import org.apache.doris.analysis.CastExpr;
-import org.apache.doris.analysis.DescriptorTable;
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.analysis.ExprSubstitutionMap;
 import org.apache.doris.analysis.FunctionCallExpr;
@@ -68,6 +67,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
+import org.apache.commons.collections.CollectionUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -88,18 +88,18 @@ import java.util.stream.Collectors;
  * The single-node plan needs to be wrapped in a plan fragment for it to be executable.
  */
 public class SingleNodePlanner {
-    private final static Logger LOG = LogManager.getLogger(SingleNodePlanner.class);
+    private static final Logger LOG = LogManager.getLogger(SingleNodePlanner.class);
 
-    private final PlannerContext ctx_;
+    private final PlannerContext ctx;
     private final ArrayList<ScanNode> scanNodes = Lists.newArrayList();
     private Map<Analyzer, List<ScanNode>> selectStmtToScanNodes = Maps.newHashMap();
 
     public SingleNodePlanner(PlannerContext ctx) {
-        ctx_ = ctx;
+        this.ctx = ctx;
     }
 
     public PlannerContext getPlannerContext() {
-        return ctx_;
+        return ctx;
     }
 
     public ArrayList<ScanNode> getScanNodes() {
@@ -137,7 +137,7 @@ public class SingleNodePlanner {
      * re-maps its input, set a substitution map to be applied by parents.
      */
     public PlanNode createSingleNodePlan() throws UserException, AnalysisException {
-        QueryStmt queryStmt = ctx_.getQueryStmt();
+        QueryStmt queryStmt = ctx.getQueryStmt();
         // Use the stmt's analyzer which is not necessarily the root analyzer
         // to detect empty result sets.
         Analyzer analyzer = queryStmt.getAnalyzer();
@@ -162,7 +162,7 @@ public class SingleNodePlanner {
             LOG.trace("desctbl: " + analyzer.getDescTbl().debugString());
         }
         PlanNode singleNodePlan = createQueryPlan(queryStmt, analyzer,
-                ctx_.getQueryOptions().getDefaultOrderByLimit());
+                ctx.getQueryOptions().getDefaultOrderByLimit());
         Preconditions.checkNotNull(singleNodePlan);
         return singleNodePlan;
     }
@@ -187,7 +187,7 @@ public class SingleNodePlanner {
             tupleIds.add(createResultTupleDescriptor(selectStmt, "empty", analyzer).getId());
         }
         unmarkCollectionSlots(stmt);
-        EmptySetNode node = new EmptySetNode(ctx_.getNextNodeId(), tupleIds);
+        EmptySetNode node = new EmptySetNode(ctx.getNextNodeId(), tupleIds);
         node.init(analyzer);
         // Set the output smap to resolve exprs referencing inline views within stmt.
         // Not needed for a UnionStmt because it materializes its input operands.
@@ -238,7 +238,7 @@ public class SingleNodePlanner {
             // insert possible AnalyticEvalNode before SortNode
             if (selectStmt.getAnalyticInfo() != null) {
                 AnalyticInfo analyticInfo = selectStmt.getAnalyticInfo();
-                AnalyticPlanner analyticPlanner = new AnalyticPlanner(analyticInfo, analyzer, ctx_);
+                AnalyticPlanner analyticPlanner = new AnalyticPlanner(analyticInfo, analyzer, ctx);
                 List<Expr> inputPartitionExprs = Lists.newArrayList();
                 AggregateInfo aggInfo = selectStmt.getAggInfo();
                 root = analyticPlanner.createSingleNodePlan(root,
@@ -273,7 +273,7 @@ public class SingleNodePlanner {
             if (limit == -1 && analyzer.getContext().getSessionVariable().enableSpilling) {
                 useTopN = false;
             }
-            root = new SortNode(ctx_.getNextNodeId(), root, stmt.getSortInfo(),
+            root = new SortNode(ctx.getNextNodeId(), root, stmt.getSortInfo(),
                     useTopN, limit == -1, stmt.getOffset());
             if (useTopN) {
                 root.setLimit(limit != -1 ? limit : newDefaultOrderByLimit);
@@ -325,7 +325,7 @@ public class SingleNodePlanner {
             return root;
         }
         // evaluate conjuncts in SelectNode
-        SelectNode selectNode = new SelectNode(ctx_.getNextNodeId(), root, conjuncts);
+        SelectNode selectNode = new SelectNode(ctx.getNextNodeId(), root, conjuncts);
         selectNode.init(analyzer);
         Preconditions.checkState(selectNode.hasValidStats());
         return selectNode;
@@ -341,15 +341,11 @@ public class SingleNodePlanner {
         // Gather unassigned conjuncts and generate predicates to enfore
         // slot equivalences for each tuple id.
         List<Expr> conjuncts = analyzer.getUnassignedConjuncts(root);
-        for (TupleId tid : tupleIds) {
-            // TODO(zc)
-            // analyzer.createEquivConjuncts(tid, conjuncts);
-        }
         if (conjuncts.isEmpty()) {
             return root;
         }
         // evaluate conjuncts in SelectNode
-        SelectNode selectNode = new SelectNode(ctx_.getNextNodeId(), root, conjuncts);
+        SelectNode selectNode = new SelectNode(ctx.getNextNodeId(), root, conjuncts);
         // init() marks conjuncts as assigned
         selectNode.init(analyzer);
         Preconditions.checkState(selectNode.hasValidStats());
@@ -382,8 +378,8 @@ public class SingleNodePlanner {
                     final JoinOperator joinOperator = selectStmt.getTableRefs().get(i).getJoinOp();
                     // TODO chenhao , right out join ?
                     if (joinOperator.isRightOuterJoin() || joinOperator.isFullOuterJoin()) {
-                        turnOffReason = selectStmt.getTableRefs().get(i) +
-                                " joinOp is full outer join or right outer join.";
+                        turnOffReason = selectStmt.getTableRefs().get(i)
+                                + " joinOp is full outer join or right outer join.";
                         aggTableValidate = false;
                         break;
                     }
@@ -442,8 +438,8 @@ public class SingleNodePlanner {
                 for (SlotDescriptor slot : selectStmt.getTableRefs().get(0).getDesc().getSlots()) {
                     if (!slot.getColumn().isKey()) {
                         if (conjunctSlotIds.contains(slot.getId())) {
-                            turnOffReason = "conjunct on `" + slot.getColumn().getName() +
-                                    "` which is StorageEngine value column";
+                            turnOffReason = "conjunct on `" + slot.getColumn().getName()
+                                    + "` which is StorageEngine value column";
                             valueColumnValidate = false;
                             break;
                         }
@@ -471,7 +467,9 @@ public class SingleNodePlanner {
                                 && child.getChild(0).getType().isNumericType()) {
                             returnColumns.add(((SlotRef) child.getChild(0)).getDesc().getColumn());
                         } else {
-                            turnOffReason = "aggExpr.getChild(0)[" + aggExpr.getChild(0).toSql() + "] is not Numeric CastExpr";
+                            turnOffReason = "aggExpr.getChild(0)["
+                                    + aggExpr.getChild(0).toSql()
+                                    + "] is not Numeric CastExpr";
                             aggExprValidate = false;
                             break;
                         }
@@ -484,8 +482,6 @@ public class SingleNodePlanner {
                             conditionExpr.getIds(conditionTupleIds, conditionSlotIds);
 
                             for (SlotId conditionSlotId : conditionSlotIds) {
-                                DescriptorTable descTable = analyzer.getDescTbl();
-                                SlotDescriptor slotDesc = descTable.getSlotDesc(conditionSlotId);
                                 conditionColumns.add(analyzer.getDescTbl().getSlotDesc(conditionSlotId).getColumn());
                             }
                         }
@@ -677,7 +673,8 @@ public class SingleNodePlanner {
      * required tuple ids of one or more TableRefs in subplanRefs are materialized
      * Returns null if we can't create an executable plan.
      */
-    private PlanNode createCheapestJoinPlan(Analyzer analyzer, List<Pair<TableRef, PlanNode>> refPlans) throws UserException {
+    private PlanNode createCheapestJoinPlan(Analyzer analyzer,
+            List<Pair<TableRef, PlanNode>> refPlans) throws UserException {
         if (refPlans.size() == 1) {
             return refPlans.get(0).second;
         }
@@ -729,7 +726,9 @@ public class SingleNodePlanner {
 
         for (Pair<TableRef, Long> candidate : candidates) {
             PlanNode result = createJoinPlan(analyzer, candidate.first, refPlans);
-            if (result != null) return result;
+            if (result != null) {
+                return result;
+            }
         }
         return null;
     }
@@ -780,16 +779,15 @@ public class SingleNodePlanner {
         // join in the original query still remain to the left/right after join ordering.
         // This prevents join re-ordering across outer/semi joins which is generally wrong.
 
-        /**
-         * Key of precedingRefs: the right table ref of outer or semi join
-         * Value of precedingRefs: the preceding refs of this key
-         * For example:
-         * select * from t1, t2, t3 left join t4, t5, t6 right semi join t7, t8, t9
-         * Map:
-         * { t4: [t1, t2, t3],
-         *   t7: [t1, t2, t3, t4, t5, t6]
-         * }
-         */
+
+        // Key of precedingRefs: the right table ref of outer or semi join
+        // Value of precedingRefs: the preceding refs of this key
+        // For example:
+        // select * from t1, t2, t3 left join t4, t5, t6 right semi join t7, t8, t9
+        // Map:
+        // { t4: [t1, t2, t3],
+        //   t7: [t1, t2, t3, t4, t5, t6]
+        // }
         Map<TableRef, Set<TableRef>> precedingRefs = new HashMap<>();
         List<TableRef> tmpTblRefs = new ArrayList<>();
         for (Pair<TableRef, PlanNode> entry : refPlans) {
@@ -824,21 +822,20 @@ public class SingleNodePlanner {
                 if (requiredRefs != null) {
                     Preconditions.checkState(joinOp.isOuterJoin()
                             || joinOp.isSemiJoin());
-                    /**
-                     * The semi and outer join nodes are similar to the stop nodes in each round of the algorithm.
-                     * If the stop node is encountered during the current round of optimal selection,
-                     * it means that the following nodes do not need to be referred to.
-                     * This round has been completed.
-                     * There are two situation in here.
-                     * Situation 1: required table refs have not been placed yet
-                     * t1, t2, t3 left join t4, t5
-                     *     Round 1: t3, t1(new root) meets t4(stop)
-                     *              stop this round and begin next round
-                     * Situation 2: the remaining table refs to prevent incorrect re-ordering of tables across outer/semi joins
-                     *     Round 1: t5, t1, t2, t3(root) meets t4(stop)
-                     *              stop this round while the new root is null
-                     *              planning failed and return null
-                     */
+                    // The semi and outer join nodes are similar to the stop nodes in each round of the algorithm.
+                    // If the stop node is encountered during the current round of optimal selection,
+                    // it means that the following nodes do not need to be referred to.
+                    // This round has been completed.
+                    // There are two situation in here.
+                    // Situation 1: required table refs have not been placed yet
+                    // t1, t2, t3 left join t4, t5
+                    //     Round 1: t3, t1(new root) meets t4(stop)
+                    //              stop this round and begin next round
+                    // Situation 2: the remaining table refs to prevent incorrect re-ordering
+                    //              of tables across outer/semi joins
+                    //     Round 1: t5, t1, t2, t3(root) meets t4(stop)
+                    //              stop this round while the new root is null
+                    //              planning failed and return null
                     if (!requiredRefs.equals(joinedRefs)) {
                         break;
                     }
@@ -872,14 +869,16 @@ public class SingleNodePlanner {
 
                 // Always prefer Hash Join over Nested-Loop Join due to limited costing
                 // infrastructure.
-                /**
-                 * The following three conditions are met while the candidate is better.
-                 * 1. The first candidate
-                 * 2. The candidate is better than new root: [t3, t2] pk [t3, t1] => [t3, t1]
-                 * 3. The hash join is better than cross join: [t3 cross t1] pk [t3 hash t2] => t3 hash t2
-                 */
+                //
+                // The following three conditions are met while the candidate is better.
+                // 1. The first candidate
+                // 2. The candidate is better than new root: [t3, t2] pk [t3, t1] => [t3, t1]
+                // 3. The hash join is better than cross join: [t3 cross t1] pk [t3 hash t2] => t3 hash t2
                 if (newRoot == null
-                        || ((candidate.getClass().equals(newRoot.getClass()) && candidateCardinalityIsSmaller(candidate, tblRefToPlanNodeOfCandidate.second.getCardinality(), newRoot, newRootRightChildCardinality)))
+                        || ((candidate.getClass().equals(newRoot.getClass())
+                        && candidateCardinalityIsSmaller(
+                                candidate, tblRefToPlanNodeOfCandidate.second.getCardinality(),
+                                newRoot, newRootRightChildCardinality)))
                         || (candidate instanceof HashJoinNode && newRoot instanceof CrossJoinNode)) {
                     newRoot = candidate;
                     minEntry = tblRefToPlanNodeOfCandidate;
@@ -887,19 +886,18 @@ public class SingleNodePlanner {
                 }
             }
 
-            /**
-             * The table after the outer or semi join is wrongly planned to the front,
-             * causing the first tblRefToPlanNodeOfCandidate (outer or semi tbl ref) in this round of loop to fail and exit the loop.
-             * This means that the current leftmost node must be wrong, and the correct result cannot be planned.
-             *
-             * For example:
-             * Query: t1 left join t2 inner join t3
-             * Input params: t3(left most tbl ref), [t1,t2] (remaining refs)
-             *     Round 1: t3, t1 (joined refs) t2 (remaining refs)
-             *     Round 2: requiredRefs.equals(joinedRefs) is false and break, the newRoot is null
-             * Result: null
-             * The t3 should not appear before t2 so planning is fail
-             */
+            // The table after the outer or semi join is wrongly planned to the front,
+            // causing the first tblRefToPlanNodeOfCandidate (outer or semi tbl ref)
+            // in this round of loop to fail and exit the loop.
+            // This means that the current leftmost node must be wrong, and the correct result cannot be planned.
+            //
+            // For example:
+            // Query: t1 left join t2 inner join t3
+            // Input params: t3(left most tbl ref), [t1,t2] (remaining refs)
+            //     Round 1: t3, t1 (joined refs) t2 (remaining refs)
+            //     Round 2: requiredRefs.equals(joinedRefs) is false and break, the newRoot is null
+            // Result: null
+            // The t3 should not appear before t2 so planning is fail
             if (newRoot == null) {
                 // Could not generate a valid plan.
                 // for example: the biggest table is the last table
@@ -960,7 +958,7 @@ public class SingleNodePlanner {
         }
 
         if (analyzer.hasEmptySpjResultSet() && selectStmt.getAggInfo() != null) {
-            final PlanNode emptySetNode = new EmptySetNode(ctx_.getNextNodeId(), rowTuples);
+            final PlanNode emptySetNode = new EmptySetNode(ctx.getNextNodeId(), rowTuples);
             emptySetNode.init(analyzer);
             emptySetNode.setOutputSmap(selectStmt.getBaseTblSmap());
             return createAggregationPlan(selectStmt, analyzer, emptySetNode);
@@ -1018,7 +1016,6 @@ public class SingleNodePlanner {
             }
 
             for (int i = 1; i < selectStmt.getTableRefs().size(); ++i) {
-                TableRef outerRef = selectStmt.getTableRefs().get(i - 1);
                 TableRef innerRef = selectStmt.getTableRefs().get(i);
                 root = createJoinNode(analyzer, root, innerRef, selectStmt);
                 // Have the build side of a join copy data to a compact representation
@@ -1067,7 +1064,7 @@ public class SingleNodePlanner {
         GroupingInfo groupingInfo = selectStmt.getGroupingInfo();
         Preconditions.checkState(groupByClause != null && groupByClause.isGroupByExtension()
                 && groupingInfo != null);
-        root = new RepeatNode(ctx_.getNextNodeId(), root, groupingInfo, groupByClause);
+        root = new RepeatNode(ctx.getNextNodeId(), root, groupingInfo, groupByClause);
         root.init(analyzer);
         return root;
     }
@@ -1099,7 +1096,8 @@ public class SingleNodePlanner {
                 // select index by the old Rollup selector
                 olapScanNode.selectBestRollupByRollupSelector(analyzer);
                 // select index by the new Materialized selector
-                MaterializedViewSelector.BestIndexInfo bestIndexInfo = materializedViewSelector.selectBestMV(olapScanNode);
+                MaterializedViewSelector.BestIndexInfo bestIndexInfo
+                        = materializedViewSelector.selectBestMV(olapScanNode);
                 if (bestIndexInfo == null) {
                     selectFailed |= true;
                     TupleId tupleId = olapScanNode.getTupleId();
@@ -1135,7 +1133,7 @@ public class SingleNodePlanner {
         // add aggregation, if required
         AggregateInfo aggInfo = selectStmt.getAggInfo();
         // aggInfo.substitueGroupingExpr(analyzer);
-        PlanNode newRoot = new AggregationNode(ctx_.getNextNodeId(), root, aggInfo);
+        PlanNode newRoot = new AggregationNode(ctx.getNextNodeId(), root, aggInfo);
         newRoot.init(analyzer);
         Preconditions.checkState(newRoot.hasValidStats());
         // if we're computing DISTINCT agg fns, the analyzer already created the
@@ -1144,7 +1142,7 @@ public class SingleNodePlanner {
             ((AggregationNode) newRoot).unsetNeedsFinalize();
             // The output of the 1st phase agg is the 1st phase intermediate.
             ((AggregationNode) newRoot).setIntermediateTuple();
-            newRoot = new AggregationNode(ctx_.getNextNodeId(), newRoot,
+            newRoot = new AggregationNode(ctx.getNextNodeId(), newRoot,
                     aggInfo.getSecondPhaseDistinctAggInfo());
             newRoot.init(analyzer);
             Preconditions.checkState(newRoot.hasValidStats());
@@ -1163,7 +1161,7 @@ public class SingleNodePlanner {
         ArrayList<Expr> resultExprs = selectStmt.getResultExprs();
         // Create tuple descriptor for materialized tuple.
         TupleDescriptor tupleDesc = createResultTupleDescriptor(selectStmt, "union", analyzer);
-        UnionNode unionNode = new UnionNode(ctx_.getNextNodeId(), tupleDesc.getId());
+        UnionNode unionNode = new UnionNode(ctx.getNextNodeId(), tupleDesc.getId());
 
         // Analysis guarantees that selects without a FROM clause only have constant exprs.
         if (selectStmt.getValueList() != null) {
@@ -1340,7 +1338,7 @@ public class SingleNodePlanner {
                 Preconditions.checkState(inlineViewRef.getMaterializedTupleIds().size() == 1);
                 // we need to materialize all slots of our inline view tuple
                 analyzer.getTupleDesc(inlineViewRef.getId()).materializeSlots();
-                UnionNode unionNode = new UnionNode(ctx_.getNextNodeId(),
+                UnionNode unionNode = new UnionNode(ctx.getNextNodeId(),
                         inlineViewRef.getMaterializedTupleIds().get(0));
                 if (analyzer.hasEmptyResultSet()) {
                     return unionNode;
@@ -1672,33 +1670,33 @@ public class SingleNodePlanner {
 
         switch (tblRef.getTable().getType()) {
             case OLAP:
-                OlapScanNode olapNode = new OlapScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), 
+                OlapScanNode olapNode = new OlapScanNode(ctx.getNextNodeId(), tblRef.getDesc(),
                         "OlapScanNode");
                 olapNode.setForceOpenPreAgg(tblRef.isForcePreAggOpened());
                 scanNode = olapNode;
                 break;
             case ODBC:
-                scanNode = new OdbcScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), (OdbcTable) tblRef.getTable());
+                scanNode = new OdbcScanNode(ctx.getNextNodeId(), tblRef.getDesc(), (OdbcTable) tblRef.getTable());
                 break;
             case MYSQL:
-                scanNode = new MysqlScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), (MysqlTable) tblRef.getTable());
+                scanNode = new MysqlScanNode(ctx.getNextNodeId(), tblRef.getDesc(), (MysqlTable) tblRef.getTable());
                 break;
             case SCHEMA:
-                scanNode = new SchemaScanNode(ctx_.getNextNodeId(), tblRef.getDesc());
+                scanNode = new SchemaScanNode(ctx.getNextNodeId(), tblRef.getDesc());
                 break;
             case BROKER:
-                scanNode = new BrokerScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), "BrokerScanNode",
+                scanNode = new BrokerScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "BrokerScanNode",
                         null, -1);
                 break;
             case ELASTICSEARCH:
-                scanNode = new EsScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), "EsScanNode");
+                scanNode = new EsScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "EsScanNode");
                 break;
             case HIVE:
-                scanNode = new HiveScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), "HiveScanNode",
+                scanNode = new HiveScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "HiveScanNode",
                         null, -1);
                 break;
             case ICEBERG:
-                scanNode = new IcebergScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), "IcebergScanNode",
+                scanNode = new IcebergScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "IcebergScanNode",
                         null, -1);
                 break;
             default:
@@ -1710,7 +1708,8 @@ public class SingleNodePlanner {
 
         scanNodes.add(scanNode);
         // now we put the selectStmtToScanNodes's init before the scanNode.init
-        List<ScanNode> scanNodeList = selectStmtToScanNodes.computeIfAbsent(selectStmt.getAnalyzer(), k -> Lists.newArrayList());
+        List<ScanNode> scanNodeList = selectStmtToScanNodes.computeIfAbsent(
+                selectStmt.getAnalyzer(), k -> Lists.newArrayList());
         scanNodeList.add(scanNode);
 
         scanNode.init(analyzer);
@@ -1803,11 +1802,12 @@ public class SingleNodePlanner {
             }
 
             // construct cross join node
-            // LOG.debug("Join between {} and {} requires at least one conjunctive equality predicate between the two tables",
+            // LOG.debug("Join between {} and {} requires at least one conjunctive"
+            //        + " equality predicate between the two tables",
             //        outerRef.getAliasAsName(), innerRef.getAliasAsName());
             // TODO If there are eq join predicates then we should construct a hash join
             CrossJoinNode result =
-                    new CrossJoinNode(ctx_.getNextNodeId(), outer, inner, innerRef);
+                    new CrossJoinNode(ctx.getNextNodeId(), outer, inner, innerRef);
             result.init(analyzer);
             return result;
         }
@@ -1826,7 +1826,7 @@ public class SingleNodePlanner {
         }
 
         HashJoinNode result =
-                new HashJoinNode(ctx_.getNextNodeId(), outer, inner, innerRef, eqJoinConjuncts,
+                new HashJoinNode(ctx.getNextNodeId(), outer, inner, innerRef, eqJoinConjuncts,
                         ojConjuncts);
         result.init(analyzer);
         return result;
@@ -1889,7 +1889,7 @@ public class SingleNodePlanner {
             throws UserException {
         Preconditions.checkNotNull(lateralViewRefs);
         Preconditions.checkState(lateralViewRefs.size() > 0);
-        TableFunctionNode tableFunctionNode = new TableFunctionNode(ctx_.getNextNodeId(), inputNode,
+        TableFunctionNode tableFunctionNode = new TableFunctionNode(ctx.getNextNodeId(), inputNode,
                 lateralViewRefs);
         tableFunctionNode.init(analyzer);
         tableFunctionNode.projectSlots(analyzer, selectStmt);
@@ -1921,15 +1921,15 @@ public class SingleNodePlanner {
         }
         switch (operation) {
             case UNION:
-                setOpNode = new UnionNode(ctx_.getNextNodeId(), setOperationStmt.getTupleId(),
+                setOpNode = new UnionNode(ctx.getNextNodeId(), setOperationStmt.getTupleId(),
                         setOperationStmt.getSetOpsResultExprs(), false);
                 break;
             case INTERSECT:
-                setOpNode = new IntersectNode(ctx_.getNextNodeId(), setOperationStmt.getTupleId(),
+                setOpNode = new IntersectNode(ctx.getNextNodeId(), setOperationStmt.getTupleId(),
                         setOperationStmt.getSetOpsResultExprs(), false);
                 break;
             case EXCEPT:
-                setOpNode = new ExceptNode(ctx_.getNextNodeId(), setOperationStmt.getTupleId(),
+                setOpNode = new ExceptNode(ctx.getNextNodeId(), setOperationStmt.getTupleId(),
                         setOperationStmt.getSetOpsResultExprs(), false);
                 break;
             default:
@@ -2122,7 +2122,7 @@ public class SingleNodePlanner {
         if (hasDistinctOps) {
             result = createSetOperationPlan(
                     analyzer, setOperationStmt, distinctOps, result, defaultOrderByLimit);
-            result = new AggregationNode(ctx_.getNextNodeId(), result,
+            result = new AggregationNode(ctx.getNextNodeId(), result,
                     setOperationStmt.getDistinctAggInfo());
             result.init(analyzer);
         }
@@ -2136,7 +2136,7 @@ public class SingleNodePlanner {
 
     private PlanNode createAssertRowCountNode(PlanNode input, AssertNumRowsElement assertNumRowsElement,
                                               Analyzer analyzer) throws UserException {
-        AssertNumRowsNode root = new AssertNumRowsNode(ctx_.getNextNodeId(), input, assertNumRowsElement);
+        AssertNumRowsNode root = new AssertNumRowsNode(ctx.getNextNodeId(), input, assertNumRowsElement);
         root.init(analyzer);
         return root;
     }
@@ -2298,7 +2298,7 @@ public class SingleNodePlanner {
         if (putPredicatesOnAggregation(stmt, analyzer, pushDownPredicates)) {
             return;
         }
-        putPredicatesOnFrom(stmt, analyzer, pushDownPredicates);
+        putPredicatesOnTargetTupleIds(stmt.getTableRefIds(), analyzer, predicates);
     }
 
     private void pushDownPredicatesPastWindows(Analyzer analyzer, SelectStmt stmt) throws AnalysisException {
@@ -2319,28 +2319,58 @@ public class SingleNodePlanner {
         if (putPredicatesOnAggregation(stmt, analyzer, pushDownPredicates)) {
             return;
         }
-        putPredicatesOnFrom(stmt, analyzer, pushDownPredicates);
+        putPredicatesOnTargetTupleIds(stmt.getTableRefIds(), analyzer, predicates);
     }
 
-    private void pushDownPredicatesPastAggregation(Analyzer analyzer, SelectStmt stmt) throws AnalysisException {
-        final AggregateInfo aggregateInfo = stmt.getAggInfo();
-        if (aggregateInfo == null || aggregateInfo.getGroupingExprs().size() <= 0) {
+    /**
+     * Push down predicates past one phase aggregation.
+     *
+     * @param aggregateInfo one phase aggregate info. Either first phase or second phase
+     * @param analyzer current statement's analyzer
+     * @param stmt current stmt
+     * @param targetTupleIds target tuple to register.
+     *                      Table tuple ids when process first phase agg.
+     *                      First aggregate's tuple id when process second phase agg.
+     * @throws AnalysisException throw exception when register predicate to tuple failed
+     */
+    private void pushDownPredicatesPastAggregationOnePhase(AggregateInfo aggregateInfo,
+            Analyzer analyzer, SelectStmt stmt, List<TupleId> targetTupleIds) throws AnalysisException {
+        if (aggregateInfo == null || aggregateInfo.getGroupingExprs().isEmpty()) {
             return;
         }
         final List<Expr> predicates = getBoundPredicates(analyzer, aggregateInfo.getOutputTupleDesc());
-        if (predicates.size() <= 0) {
+        if (predicates.isEmpty()) {
             return;
         }
-
         // Push down predicates to aggregation's child until they are assigned successfully.
         final List<Expr> pushDownPredicates = getPredicatesBoundedByGroupbysSourceExpr(predicates, analyzer, stmt);
-        if (pushDownPredicates.size() <= 0) {
+        if (CollectionUtils.isEmpty(pushDownPredicates)) {
             return;
         }
-        putPredicatesOnFrom(stmt, analyzer, pushDownPredicates);
+        putPredicatesOnTargetTupleIds(targetTupleIds, analyzer, pushDownPredicates);
     }
 
-    private List<Expr> getPredicatesBoundedByGroupbysSourceExpr(List<Expr> predicates, Analyzer analyzer, SelectStmt stmt) {
+    /**
+     * Push down predicates past whole aggregate stage. Include first phase and second phase.
+     *
+     * @param analyzer current statement's analyzer
+     * @param stmt current stmt
+     * @throws AnalysisException throw exception when register predicate to tuple failed
+     */
+    private void pushDownPredicatesPastAggregation(Analyzer analyzer, SelectStmt stmt) throws AnalysisException {
+        final AggregateInfo firstPhaseAggInfo = stmt.getAggInfo();
+        if (firstPhaseAggInfo == null) {
+            return;
+        }
+        final AggregateInfo secondPhaseAggInfo = firstPhaseAggInfo.getSecondPhaseDistinctAggInfo();
+
+        final List<TupleId> firstPhaseTupleIds = Lists.newArrayList(firstPhaseAggInfo.getOutputTupleId());
+        pushDownPredicatesPastAggregationOnePhase(secondPhaseAggInfo, analyzer, stmt, firstPhaseTupleIds);
+        pushDownPredicatesPastAggregationOnePhase(firstPhaseAggInfo, analyzer, stmt, stmt.getTableRefIds());
+    }
+
+    private List<Expr> getPredicatesBoundedByGroupbysSourceExpr(
+            List<Expr> predicates, Analyzer analyzer, SelectStmt stmt) {
         final List<Expr> predicatesCanPushDown = Lists.newArrayList();
         for (Expr predicate : predicates) {
             if (predicate.isConstant()) {
@@ -2352,10 +2382,21 @@ public class SingleNodePlanner {
             final List<SlotId> slotIds = Lists.newArrayList();
             predicate.getIds(tupleIds, slotIds);
 
-            boolean isAllSlotReferingGroupBys = true;
+            boolean isAllSlotReferToGroupBys = true;
             for (SlotId slotId : slotIds) {
-                final SlotDescriptor slotDesc = analyzer.getDescTbl().getSlotDesc(slotId);
-                Expr sourceExpr = slotDesc.getSourceExprs().get(0);
+                Expr sourceExpr = new SlotRef(analyzer.getDescTbl().getSlotDesc(slotId));
+                // Every phase in aggregate will wrap expression with SlotRef.
+                // When we process one phase aggregate, we just need to unwrap once.
+                // But when we process 2 phase aggregate, we need to unwrap twice.
+                // So use loop here to adapt to different situations.
+                while (sourceExpr instanceof SlotRef) {
+                    SlotRef slotRef = (SlotRef) sourceExpr;
+                    SlotDescriptor slotDesc = slotRef.getDesc();
+                    if (slotDesc.getSourceExprs().isEmpty()) {
+                        break;
+                    }
+                    sourceExpr = slotDesc.getSourceExprs().get(0);
+                }
                 // if grouping set is given and column is not in all grouping set list
                 // we cannot push the predicate since the column value can be null
                 if (stmt.getGroupByClause() == null) {
@@ -2368,24 +2409,24 @@ public class SingleNodePlanner {
                     // if grouping type is CUBE or ROLLUP will definitely produce null
                     if (stmt.getGroupByClause().getGroupingType() == GroupByClause.GroupingType.CUBE
                             || stmt.getGroupByClause().getGroupingType() == GroupByClause.GroupingType.ROLLUP) {
-                        isAllSlotReferingGroupBys = false;
+                        isAllSlotReferToGroupBys = false;
                     } else {
                         // if grouping type is GROUPING_SETS and the predicate not in all grouping list,
                         // the predicate cannot be push down
                         for (List<Expr> exprs : stmt.getGroupByClause().getGroupingSetList()) {
                             if (!exprs.contains(sourceExpr)) {
-                                isAllSlotReferingGroupBys = false;
+                                isAllSlotReferToGroupBys = false;
                                 break;
                             }
                         }
                     }
                 }
                 if (sourceExpr.getFn() instanceof AggregateFunction) {
-                    isAllSlotReferingGroupBys = false;
+                    isAllSlotReferToGroupBys = false;
                 }
             }
 
-            if (isAllSlotReferingGroupBys) {
+            if (isAllSlotReferToGroupBys) {
                 predicatesCanPushDown.add(predicate);
             }
         }
@@ -2444,16 +2485,22 @@ public class SingleNodePlanner {
         return false;
     }
 
-    // Register predicates with TableRef's tuple id.
-    private void putPredicatesOnFrom(SelectStmt stmt, Analyzer analyzer, List<Expr> predicates)
+    /**
+     * Register predicates on target tuple ids.
+     *
+     * @param analyzer current stmt analyzer
+     * @param predicates predicates try to register
+     * @param tupleIds target tupleIds
+     * @throws AnalysisException throw exception when register failed
+     */
+    private void putPredicatesOnTargetTupleIds(List<TupleId> tupleIds,
+            Analyzer analyzer, List<Expr> predicates)
             throws AnalysisException {
-        final List<TupleId> tableTupleIds = Lists.newArrayList();
-        for (TableRef tableRef : stmt.getTableRefs()) {
-            tableTupleIds.add(tableRef.getId());
+        if (CollectionUtils.isEmpty(tupleIds)) {
+            return;
         }
-
         for (Expr predicate : predicates) {
-            Preconditions.checkArgument(predicate.isBoundByTupleIds(tableTupleIds),
+            Preconditions.checkArgument(predicate.isBoundByTupleIds(tupleIds),
                     "Predicate:" + predicate.toSql() + " can't be assigned to some PlanNode.");
             final List<TupleId> predicateTupleIds = Lists.newArrayList();
             predicate.getIds(predicateTupleIds, null);
@@ -2486,18 +2533,24 @@ public class SingleNodePlanner {
      */
     public static BinaryPredicate getNormalizedEqPred(Expr expr, List<TupleId> lhsTids,
                                                       List<TupleId> rhsTids, Analyzer analyzer) {
-        if (!(expr instanceof BinaryPredicate)) return null;
+        if (!(expr instanceof BinaryPredicate)) {
+            return null;
+        }
         BinaryPredicate pred = (BinaryPredicate) expr;
         if (!pred.getOp().isEquivalence()) {
             return null;
         }
-        if (pred.getChild(0).isConstant() || pred.getChild(1).isConstant()) return null;
+        if (pred.getChild(0).isConstant() || pred.getChild(1).isConstant()) {
+            return null;
+        }
 
         // Use the child that contains lhsTids as lhsExpr, for example, A join B on B.k = A.k,
         // where lhsExpr=A.k, rhsExpr=B.k, changed the order, A.k = B.k
         Expr lhsExpr = Expr.getFirstBoundChild(pred, lhsTids);
         Expr rhsExpr = Expr.getFirstBoundChild(pred, rhsTids);
-        if (lhsExpr == null || rhsExpr == null || lhsExpr == rhsExpr) return null;
+        if (lhsExpr == null || rhsExpr == null || lhsExpr == rhsExpr) {
+            return null;
+        }
 
         BinaryPredicate result = new BinaryPredicate(pred.getOp(), lhsExpr, rhsExpr);
         result.analyzeNoThrow(analyzer);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java
index 8548ba689a..bb30785eef 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java
@@ -448,5 +448,4 @@ public class PlannerTest {
         expectedEx.expectMessage("String Type should not be used in key column[k1].");
         UtFrameUtils.parseAndAnalyzeStmt(createTbl1, ctx);
     }
-
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 01/09: [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit ca952c91bbb78b09a2a9fb51d723eff84e21a8f1
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Tue May 17 14:42:20 2022 +0800

    [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
    
    Co-authored-by: lihaopeng <li...@baidu.com>
---
 be/src/vec/sink/vtablet_sink.cpp | 209 +++++++++++++++++++--------------------
 be/src/vec/sink/vtablet_sink.h   |   6 +-
 2 files changed, 107 insertions(+), 108 deletions(-)

diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index e9768c9460..ba3da67b99 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -96,6 +96,7 @@ Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block)
             // because of "data unqualified"
             return Status::EndOfFile("Encountered unqualified data, stop processing");
         }
+        _convert_to_dest_desc_block(&block);
     }
 
     BlockRow block_row;
@@ -176,132 +177,105 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
                 block->get_by_position(i).column->convert_to_full_column_if_const();
         const auto& column = block->get_by_position(i).column;
 
-        if (desc->type() == TYPE_OBJECT && column->is_nullable()) {
-            const auto& null_map =
-                    vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)
-                            ->get_null_map_data();
-            fmt::format_to(error_msg, "null is not allowed for bitmap column, column_name: {}; ",
-                           desc->col_name());
+        auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+        auto& real_column_ptr =
+                column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
 
+        switch (desc->type().type) {
+        case TYPE_CHAR:
+        case TYPE_VARCHAR:
+        case TYPE_STRING: {
+            const auto column_string =
+                    assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
+
+            size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
             for (int j = 0; j < num_rows; ++j) {
                 if (!filter_bitmap->Get(j)) {
-                    if (null_map[j]) {
-                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                    }
-                }
-            }
-        } else {
-            auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
-            auto& real_column_ptr =
-                    column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
-
-            switch (desc->type().type) {
-            case TYPE_CHAR:
-            case TYPE_VARCHAR:
-            case TYPE_STRING: {
-                const auto column_string =
-                        assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
-
-                size_t limit =
-                        std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
-                for (int j = 0; j < num_rows; ++j) {
-                    if (!filter_bitmap->Get(j)) {
-                        auto str_val = column_string->get_data_at(j);
-                        bool invalid = str_val.size > limit;
+                    auto str_val = column_string->get_data_at(j);
+                    bool invalid = str_val.size > limit;
 
-                        error_msg.clear();
-                        if (str_val.size > desc->type().len) {
-                            fmt::format_to(error_msg, "{}",
-                                           "the length of input is too long than schema. ");
-                            fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
-                            fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
-                            fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
-                            fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
-                        } else if (str_val.size > limit) {
-                            fmt::format_to(
-                                    error_msg, "{}",
-                                    "the length of input string is too long than vec schema. ");
-                            fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
-                            fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
-                            fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
-                            fmt::format_to(error_msg, "limit length: {}; ", limit);
-                            fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
-                        }
+                    error_msg.clear();
+                    if (str_val.size > desc->type().len) {
+                        fmt::format_to(error_msg, "{}",
+                                       "the length of input is too long than schema. ");
+                        fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+                        fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+                        fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+                        fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
+                    } else if (str_val.size > limit) {
+                        fmt::format_to(error_msg, "{}",
+                                       "the length of input string is too long than vec schema. ");
+                        fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+                        fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+                        fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+                        fmt::format_to(error_msg, "limit length: {}; ", limit);
+                        fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
+                    }
 
-                        if (invalid) {
-                            RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                        }
+                    if (invalid) {
+                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
                     }
                 }
-                break;
             }
-            case TYPE_DECIMALV2: {
-                auto column_decimal = const_cast<
-                        vectorized::ColumnDecimal<vectorized::Decimal128>*>(
-                        assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
-                                real_column_ptr.get()));
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            auto column_decimal = const_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+                    assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+                            real_column_ptr.get()));
 
-                for (int j = 0; j < num_rows; ++j) {
-                    if (!filter_bitmap->Get(j)) {
-                        auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
-                                column_decimal->get_data()[j]);
-                        error_msg.clear();
-                        bool invalid = false;
+            for (int j = 0; j < num_rows; ++j) {
+                if (!filter_bitmap->Get(j)) {
+                    auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
+                            column_decimal->get_data()[j]);
+                    error_msg.clear();
+                    bool invalid = false;
 
-                        if (dec_val.greater_than_scale(desc->type().scale)) {
-                            auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
-                            column_decimal->get_data()[j] =
-                                    binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
+                    if (dec_val.greater_than_scale(desc->type().scale)) {
+                        auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
+                        column_decimal->get_data()[j] =
+                                binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
 
-                            if (code != E_DEC_OK) {
-                                fmt::format_to(error_msg, "round one decimal failed.value={}; ",
-                                               dec_val.to_string());
-                                invalid = true;
-                            }
-                        }
-                        if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
-                            fmt::format_to(error_msg,
-                                           "decimal value is not valid for definition, column={}",
-                                           desc->col_name());
-                            fmt::format_to(error_msg, ", value={}", dec_val.to_string());
-                            fmt::format_to(error_msg, ", precision={}, scale={}; ",
-                                           desc->type().precision, desc->type().scale);
+                        if (code != E_DEC_OK) {
+                            fmt::format_to(error_msg, "round one decimal failed.value={}; ",
+                                           dec_val.to_string());
                             invalid = true;
                         }
+                    }
+                    if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
+                        fmt::format_to(error_msg,
+                                       "decimal value is not valid for definition, column={}",
+                                       desc->col_name());
+                        fmt::format_to(error_msg, ", value={}", dec_val.to_string());
+                        fmt::format_to(error_msg, ", precision={}, scale={}; ",
+                                       desc->type().precision, desc->type().scale);
+                        invalid = true;
+                    }
 
-                        if (invalid) {
-                            RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                        }
+                    if (invalid) {
+                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
                     }
                 }
-                break;
-            }
-            default:
-                break;
-            }
-
-            // Dispose the nullable column not match problem here, convert to nullable column
-            if (desc->is_nullable() && !column_ptr) {
-                block->get_by_position(i).column = vectorized::make_nullable(column);
-                block->get_by_position(i).type =
-                        vectorized::make_nullable(block->get_by_position(i).type);
             }
+            break;
+        }
+        default:
+            break;
+        }
 
-            // Dispose the nullable column not match problem here, convert to not nullable column
-            if (!desc->is_nullable() && column_ptr) {
-                const auto& null_map = column_ptr->get_null_map_data();
-                for (int j = 0; j < null_map.size(); ++j) {
-                    fmt::format_to(error_msg, "null value for not null column, column={}; ",
-                                   desc->col_name());
-                    if (null_map[j] && !filter_bitmap->Get(j)) {
-                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                    }
+        // Dispose the the column should do not contain the NULL value
+        // Only tow case:
+        // 1. column is nullable but the desc is not nullable
+        // 2. desc->type is BITMAP
+        if ((!desc->is_nullable() || desc->type() == TYPE_OBJECT) && column_ptr) {
+            const auto& null_map = column_ptr->get_null_map_data();
+            for (int j = 0; j < null_map.size(); ++j) {
+                fmt::format_to(error_msg,
+                               "null value for not null column/or bitmap column, column={}; ",
+                               desc->col_name());
+                if (null_map[j] && !filter_bitmap->Get(j)) {
+                    RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
                 }
-                block->get_by_position(i).column = column_ptr->get_nested_column_ptr();
-                block->get_by_position(i).type =
-                        (reinterpret_cast<const vectorized::DataTypeNullable*>(
-                                 block->get_by_position(i).type.get()))
-                                ->get_nested_type();
             }
         }
     }
@@ -313,5 +287,26 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
     return Status::OK();
 }
 
+void VOlapTableSink::_convert_to_dest_desc_block(doris::vectorized::Block* block) {
+    for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
+        SlotDescriptor* desc = _output_tuple_desc->slots()[i];
+        if (desc->is_nullable() != block->get_by_position(i).type->is_nullable()) {
+            if (desc->is_nullable()) {
+                block->get_by_position(i).type =
+                        vectorized::make_nullable(block->get_by_position(i).type);
+                block->get_by_position(i).column =
+                        vectorized::make_nullable(block->get_by_position(i).column);
+            } else {
+                block->get_by_position(i).type = assert_cast<const vectorized::DataTypeNullable&>(
+                                                         *block->get_by_position(i).type)
+                                                         .get_nested_type();
+                block->get_by_position(i).column = assert_cast<const vectorized::ColumnNullable&>(
+                                                           *block->get_by_position(i).column)
+                                                           .get_nested_column_ptr();
+            }
+        }
+    }
+}
+
 } // namespace stream_load
 } // namespace doris
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
index 5514ff1909..d703334523 100644
--- a/be/src/vec/sink/vtablet_sink.h
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -58,9 +58,13 @@ private:
     Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap,
                           int* filtered_rows, bool* stop_processing);
 
+    // some output column of output expr may have different nullable property with dest slot desc
+    // so here need to do the convert operation
+    void _convert_to_dest_desc_block(vectorized::Block* block);
+
     VOlapTablePartitionParam* _vpartition = nullptr;
     std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
 };
 
 } // namespace stream_load
-} // namespace doris
\ No newline at end of file
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 02/09: [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit b20df704b18b9b6741401e9211d180d9b19c8532
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Tue May 17 22:36:30 2022 +0800

    [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)
    
    Reset state of replica which state are in DECOMMISSION after finished scheduling.
---
 .../org/apache/doris/clone/TabletSchedCtx.java     | 24 ++++++++++++++
 .../org/apache/doris/clone/TabletScheduler.java    | 38 +++++++++++++---------
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 6610b484ae..ac7a96efc2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -1098,6 +1098,8 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
             sb.append(". to backend: ").append(destBackendId);
             sb.append(", dest path hash: ").append(destPathHash);
         }
+        sb.append(", visible version: ").append(visibleVersion);
+        sb.append(", committed version: ").append(committedVersion);
         if (errMsg != null) {
             sb.append(". err: ").append(errMsg);
         }
@@ -1119,4 +1121,26 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
             }
         }
     }
+
+    /**
+     * call this when releaseTabletCtx()
+     */
+    public void resetReplicaState() {
+        if (tablet != null) {
+            for (Replica replica : tablet.getReplicas()) {
+                // To address issue: https://github.com/apache/incubator-doris/issues/9422
+                // the DECOMMISSION state is set in TabletScheduler and not persist to meta.
+                // So it is reasonable to reset this state if we failed to scheduler this tablet.
+                // That is, if the TabletScheduler cannot process the tablet, then it should reset
+                // any intermediate state it set during the scheduling process.
+                if (replica.getState() == ReplicaState.DECOMMISSION) {
+                    replica.setState(ReplicaState.NORMAL);
+                    replica.setWatermarkTxnId(-1);
+                    LOG.debug("reset replica {} on backend {} of tablet {} state from DECOMMISSION to NORMAL",
+                            replica.getId(), replica.getBackendId(), tabletId);
+                }
+            }
+        }
+    }
+
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 1f80cada9e..8231269816 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -385,12 +385,12 @@ public class TabletScheduler extends MasterDaemon {
                     if (tabletCtx.getType() == Type.BALANCE) {
                         // if balance is disabled, remove this tablet
                         if (Config.disable_balance) {
-                            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED,
+                            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(),
                                     "disable balance and " + e.getMessage());
                         } else {
                             // remove the balance task if it fails to be scheduled many times
                             if (tabletCtx.getFailedSchedCounter() > 10) {
-                                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED,
+                                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(),
                                         "schedule failed too many times and " + e.getMessage());
                             } else {
                                 // we must release resource it current hold, and be scheduled again
@@ -410,19 +410,19 @@ public class TabletScheduler extends MasterDaemon {
                 } else if (e.getStatus() == Status.FINISHED) {
                     // schedule redundant tablet or scheduler disabled will throw this exception
                     stat.counterTabletScheduledSucceeded.incrementAndGet();
-                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getMessage());
+                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getStatus(), e.getMessage());
                 } else {
                     Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus());
                     // discard
                     stat.counterTabletScheduledDiscard.incrementAndGet();
-                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
                 }
                 continue;
             } catch (Exception e) {
                 LOG.warn("got unexpected exception, discard this schedule. tablet: {}",
                         tabletCtx.getTabletId(), e);
                 stat.counterTabletScheduledFailed.incrementAndGet();
-                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage());
+                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
                 continue;
             }
 
@@ -532,7 +532,8 @@ public class TabletScheduler extends MasterDaemon {
                     for (TransactionState transactionState : dbTransactionMgr.getPreCommittedTxnList()) {
                         if(transactionState.getTableIdList().contains(tbl.getId())) {
                             // If table releate to transaction with precommitted status, do not allow to do balance.
-                            throw new SchedException(Status.UNRECOVERABLE, "There exists PRECOMMITTED transaction releated to table");
+                            throw new SchedException(Status.UNRECOVERABLE,
+                                    "There exists PRECOMMITTED transaction related to table");
                         }
                     }
                 } catch (AnalysisException e) {
@@ -1053,7 +1054,6 @@ public class TabletScheduler extends MasterDaemon {
     }
 
     private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason, boolean force) throws SchedException {
-
         /*
          * Before deleting a replica, we should make sure that there is no running txn on it and no more txns will be on it.
          * So we do followings:
@@ -1069,6 +1069,8 @@ public class TabletScheduler extends MasterDaemon {
             replica.setState(ReplicaState.DECOMMISSION);
             // set priority to normal because it may wait for a long time. Remain it as VERY_HIGH may block other task.
             tabletCtx.setOrigPriority(Priority.NORMAL);
+            LOG.debug("set replica {} on backend {} of tablet {} state to DECOMMISSION",
+                    replica.getId(), replica.getBackendId(), tabletCtx.getTabletId());
             throw new SchedException(Status.SCHEDULE_FAILED, "set watermark txn " + nextTxnId);
         } else if (replica.getState() == ReplicaState.DECOMMISSION && replica.getWatermarkTxnId() != -1) {
             long watermarkTxnId = replica.getWatermarkTxnId();
@@ -1323,17 +1325,20 @@ public class TabletScheduler extends MasterDaemon {
         addTablet(tabletCtx, true /* force */);
     }
 
-    private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, String reason) {
+    private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) {
         // use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock)
         // remove the tablet ctx, so that no other process can see it
         removeTabletCtx(tabletCtx, reason);
         // release resources taken by tablet ctx
-        releaseTabletCtx(tabletCtx, state);
+        releaseTabletCtx(tabletCtx, state, status == Status.UNRECOVERABLE);
     }
 
-    private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state) {
+    private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, boolean resetReplicaState) {
         tabletCtx.setState(state);
         tabletCtx.releaseResource(this);
+        if (resetReplicaState) {
+            tabletCtx.resetReplicaState();
+        }
         tabletCtx.setFinishedTime(System.currentTimeMillis());
     }
 
@@ -1393,25 +1398,25 @@ public class TabletScheduler extends MasterDaemon {
             } else if (e.getStatus() == Status.UNRECOVERABLE) {
                 // unrecoverable
                 stat.counterTabletScheduledDiscard.incrementAndGet();
-                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
                 return true;
             } else if (e.getStatus() == Status.FINISHED) {
                 // tablet is already healthy, just remove
-                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
                 return true;
             }
         } catch (Exception e) {
             LOG.warn("got unexpected exception when finish clone task. tablet: {}",
                     tabletCtx.getTabletId(), e);
             stat.counterTabletScheduledDiscard.incrementAndGet();
-            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage());
+            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
             return true;
         }
 
         Preconditions.checkState(tabletCtx.getState() == TabletSchedCtx.State.FINISHED);
         stat.counterCloneTaskSucceeded.incrementAndGet();
         gatherStatistics(tabletCtx);
-        finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, "finished");
+        finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished");
         return true;
     }
 
@@ -1475,7 +1480,10 @@ public class TabletScheduler extends MasterDaemon {
 
         // 2. release ctx
         timeoutTablets.stream().forEach(t -> {
-            releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED);
+            // Set "resetReplicaState" to true because
+            // the timeout task should also be considered as UNRECOVERABLE,
+            // so need to reset replica state.
+            releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED, true);
             stat.counterCloneTaskTimeout.incrementAndGet();
         });
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 06/09: [Improvement] reduce string size in serialization (#9550)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit f97b9d93871e26280138667dde29f1085730e836
Author: Gabriel <ga...@gmail.com>
AuthorDate: Tue May 17 22:38:34 2022 +0800

    [Improvement] reduce string size in serialization (#9550)
---
 be/src/vec/columns/column_string.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp
index 98ebf673b4..59f521b50c 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -163,7 +163,7 @@ ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const {
 
 StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena,
                                                    char const*& begin) const {
-    size_t string_size = size_at(n);
+    UInt32 string_size = size_at(n);
     size_t offset = offset_at(n);
 
     StringRef res;
@@ -177,7 +177,7 @@ StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena,
 }
 
 const char* ColumnString::deserialize_and_insert_from_arena(const char* pos) {
-    const size_t string_size = unaligned_load<size_t>(pos);
+    const UInt32 string_size = unaligned_load<UInt32>(pos);
     pos += sizeof(string_size);
 
     const size_t old_size = chars.size();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 05/09: [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization (#9547)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 9c00587a57dee6b5880848862ccccd8282745f22
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Tue May 17 14:01:22 2022 +0800

    [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization (#9547)
    
    Co-authored-by: lihaopeng <li...@baidu.com>
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 27 ++++++++++++++++++----
 regression-test/data/delete/test_delete.out        | 16 +++++++++++++
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a5767dcb08..3533d77dab 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -761,7 +761,9 @@ void SegmentIterator::_init_current_block(
         auto cid = _schema.column_id(i);
         auto column_desc = _schema.column(cid);
 
-        if (_is_pred_column[cid]) { //todo(wb) maybe we can release it after output block
+        // the column in in block must clear() here to insert new data
+        if (_is_pred_column[cid] ||
+            i >= block->columns()) { //todo(wb) maybe we can release it after output block
             current_columns[cid]->clear();
         } else { // non-predicate column
             current_columns[cid] = std::move(*block->get_by_position(i).column).mutate();
@@ -778,8 +780,12 @@ void SegmentIterator::_init_current_block(
 
 void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
     for (auto cid : _non_predicate_columns) {
-        block->replace_by_position(_schema_block_id_map[cid],
-                                   std::move(_current_return_columns[cid]));
+        auto loc = _schema_block_id_map[cid];
+        // if loc < block->block->columns() means the the column is delete column and should
+        // not output by block, so just skip the column.
+        if (loc < block->columns()) {
+            block->replace_by_position(loc, std::move(_current_return_columns[cid]));
+        }
     }
 }
 
@@ -911,11 +917,22 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
         _current_return_columns.resize(_schema.columns().size());
         for (size_t i = 0; i < _schema.num_column_ids(); i++) {
             auto cid = _schema.column_id(i);
+            auto column_desc = _schema.column(cid);
             if (_is_pred_column[cid]) {
-                auto column_desc = _schema.column(cid);
                 _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
                         column_desc->type(), column_desc->is_nullable());
                 _current_return_columns[cid]->reserve(_opts.block_row_max);
+            } else if (i >= block->columns()) {
+                // if i >= block->columns means the column and not the pred_column means `column i` is
+                // a delete condition column. but the column is not effective in the segment. so we just
+                // create a column to hold the data.
+                // a. origin data -> b. delete condition -> c. new load data
+                // the segment of c do not effective delete condition, but it still need read the column
+                // to match the schema.
+                // TODO: skip read the not effective delete column to speed up segment read.
+                _current_return_columns[cid] =
+                        Schema::get_data_type_ptr(column_desc->type())->create_column();
+                _current_return_columns[cid]->reserve(_opts.block_row_max);
             }
         }
     }
@@ -930,7 +947,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
     _opts.stats->raw_rows_read += nrows_read;
 
     if (nrows_read == 0) {
-        for (int i = 0; i < _schema.num_column_ids(); i++) {
+        for (int i = 0; i < block->columns(); i++) {
             auto cid = _schema.column_id(i);
             // todo(wb) abstract make column where
             if (!_is_pred_column[cid]) { // non-predicate
diff --git a/regression-test/data/delete/test_delete.out b/regression-test/data/delete/test_delete.out
new file mode 100644
index 0000000000..5dbc5223f6
--- /dev/null
+++ b/regression-test/data/delete/test_delete.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org

[incubator-doris] 08/09: [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)

Posted by mo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 0562bab929f2d2cb2a4dbec44f8e89560867bbaa
Author: ZenoYang <co...@qq.com>
AuthorDate: Wed May 18 14:57:13 2022 +0800

    [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586)
    
    Issue Number: close #9555
    Make the last value of the dictionary null, when ColumnDict inserts a null value,
    add the encoding corresponding to the last value of the dictionary·
---
 be/src/olap/comparison_predicate.cpp   |  7 +++++--
 be/src/vec/columns/column_dictionary.h | 30 ++++++++++++++----------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp
index 45a89f92ad..363c75e5b8 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -147,6 +147,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
 
 // todo(zeno) define interface in IColumn to simplify code
+// If 1 OP 0 returns true, it means the predicate is > or >=
+// If 1 OP 1 returns true, it means the predicate is >= or <=
+// by this way, avoid redundant code
 #define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE)                                       \
     template <class T>                                                                             \
     void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const {    \
@@ -164,7 +167,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                             vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);          \
                     auto& data_array = nested_col_ptr->get_data();                                 \
                     auto dict_code =                                                               \
-                            IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1)  \
+                            IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1)  \
                                      : nested_col_ptr->find_code(_value);                          \
                     for (uint16_t i = 0; i < *size; i++) {                                         \
                         uint16_t idx = sel[i];                                                     \
@@ -192,7 +195,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                 auto& dict_col =                                                                   \
                         reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
                 auto& data_array = dict_col.get_data();                                            \
-                auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1)    \
+                auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1)    \
                                           : dict_col.find_code(_value);                            \
                 for (uint16_t i = 0; i < *size; ++i) {                                             \
                     uint16_t idx = sel[i];                                                         \
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 7d7117aee9..cc27ca1cdb 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -97,12 +97,10 @@ public:
     }
 
     void insert_data(const char* pos, size_t /*length*/) override {
-        _codes.push_back(unaligned_load<T>(pos));
+        LOG(FATAL) << "insert_data not supported in ColumnDictionary";
     }
 
-    void insert_data(const T value) { _codes.push_back(value); }
-
-    void insert_default() override { _codes.push_back(T()); }
+    void insert_default() override { _codes.push_back(_dict.get_null_code()); }
 
     void clear() override {
         _codes.clear();
@@ -218,13 +216,12 @@ public:
     void insert_many_dict_data(const int32_t* data_array, size_t start_index,
                                const StringRef* dict_array, size_t data_num,
                                uint32_t dict_num) override {
-        if (!is_dict_inited()) {
+        if (_dict.empty()) {
             _dict.reserve(dict_num);
             for (uint32_t i = 0; i < dict_num; ++i) {
                 auto value = StringValue(dict_array[i].data, dict_array[i].size);
                 _dict.insert_value(value);
             }
-            _dict_inited = true;
         }
 
         char* end_ptr = (char*)_codes.get_end_ptr();
@@ -266,8 +263,6 @@ public:
         return _dict.find_codes(values);
     }
 
-    bool is_dict_inited() const { return _dict_inited; }
-
     bool is_dict_sorted() const { return _dict_sorted; }
 
     bool is_dict_code_converted() const { return _dict_code_converted; }
@@ -304,13 +299,17 @@ public:
             if (it != _inverted_index.end()) {
                 return it->second;
             }
-            return -1;
+            return -2; // -1 is null code
+        }
+
+        T get_null_code() { return -1; }
+
+        inline StringValue& get_value(T code) {
+            return code >= _dict_data.size() ? _null_value : _dict_data[code];
         }
 
-        inline StringValue& get_value(T code) { return _dict_data[code]; }
-        
         inline void generate_hash_values() {
-            if (_hash_values.size() == 0) {
+            if (_hash_values.empty()) {
                 _hash_values.resize(_dict_data.size());
                 for (size_t i = 0; i < _dict_data.size(); i++) {
                     auto& sv = _dict_data[i];
@@ -387,7 +386,10 @@ public:
 
         size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); }
 
+        bool empty() { return _dict_data.empty(); }
+
     private:
+        StringValue _null_value = StringValue();
         StringValue::Comparator _comparator;
         // dict code -> dict value
         DictContainer _dict_data;
@@ -405,16 +407,12 @@ public:
 
 private:
     size_t _reserve_size;
-    bool _dict_inited = false;
     bool _dict_sorted = false;
     bool _dict_code_converted = false;
     Dictionary _dict;
     Container _codes;
 };
 
-template class ColumnDictionary<uint8_t>;
-template class ColumnDictionary<uint16_t>;
-template class ColumnDictionary<uint32_t>;
 template class ColumnDictionary<int32_t>;
 
 using ColumnDictI32 = vectorized::ColumnDictionary<doris::vectorized::Int32>;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org