You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2023/07/11 01:39:02 UTC
[doris] branch master updated: [feature](partial update)support insert new rows in non-strict mode partial update with nullable unmentioned columns (#21623)

This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7b403bff62 [feature](partial update)support insert new rows in non-strict mode partial update with nullable unmentioned columns (#21623)
7b403bff62 is described below

commit 7b403bff6257b7b1dc7136f3696611e19b314f26
Author: bobhan1 <bh...@outlook.com>
AuthorDate: Tue Jul 11 09:38:56 2023 +0800

    [feature](partial update)support insert new rows in non-strict mode partial update with nullable unmentioned columns (#21623)
    
    1. expand the semantics of variable strict_mode to control the behavior for stream load: if strict_mode is true, the stream load can only update existing rows; if strict_mode is false, the stream load can insert new rows if the key is not present in the table
    2. when inserting a new row in non-strict mode stream load, the unmentioned columns should have default value or be nullable
---
 be/src/exec/tablet_info.cpp                        |   5 +
 be/src/exec/tablet_info.h                          |   3 +-
 be/src/olap/delta_writer.cpp                       |   1 +
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  71 +++++++++-----
 be/src/olap/rowset/segment_v2/segment_writer.h     |   3 +-
 be/src/olap/tablet_schema.cpp                      |   8 +-
 be/src/olap/tablet_schema.h                        |  12 ++-
 .../import/import-scenes/load-strict-mode.md       |  50 +++++++++-
 .../Load/STREAM-LOAD.md                            |   2 +-
 .../import/import-scenes/load-strict-mode.md       |  49 +++++++++-
 .../Load/STREAM-LOAD.md                            |   2 +-
 .../apache/doris/analysis/NativeInsertStmt.java    |   2 +-
 .../doris/load/loadv2/LoadingTaskPlanner.java      |   2 +-
 .../plans/commands/InsertIntoTableCommand.java     |   2 +-
 .../org/apache/doris/planner/OlapTableSink.java    |   6 +-
 .../apache/doris/planner/StreamLoadPlanner.java    |   4 +-
 .../apache/doris/planner/OlapTableSinkTest.java    |   8 +-
 gensrc/proto/descriptors.proto                     |   1 +
 gensrc/thrift/Descriptors.thrift                   |   1 +
 .../partial_update/test_partial_update_upsert.out  |  12 +++
 .../unique_with_mow_p0/partial_update/upsert.csv   |   3 +
 .../test_partial_update_upsert.groovy              | 103 +++++++++++++++++++++
 22 files changed, 301 insertions(+), 49 deletions(-)

diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index a4829a0d05..3413436c72 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -61,6 +61,7 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) {
     _table_id = pschema.table_id();
     _version = pschema.version();
     _is_partial_update = pschema.partial_update();
+    _is_strict_mode = pschema.is_strict_mode();
 
     for (auto& col : pschema.partial_update_input_columns()) {
         _partial_update_input_columns.insert(col);
@@ -114,6 +115,9 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) {
     _version = tschema.version;
     _is_dynamic_schema = tschema.is_dynamic_schema;
     _is_partial_update = tschema.is_partial_update;
+    if (tschema.__isset.is_strict_mode) {
+        _is_strict_mode = tschema.is_strict_mode;
+    }
 
     for (auto& tcolumn : tschema.partial_update_input_columns) {
         _partial_update_input_columns.insert(tcolumn);
@@ -180,6 +184,7 @@ void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const {
     pschema->set_table_id(_table_id);
     pschema->set_version(_version);
     pschema->set_partial_update(_is_partial_update);
+    pschema->set_is_strict_mode(_is_strict_mode);
     for (auto col : _partial_update_input_columns) {
         *pschema->add_partial_update_input_columns() = col;
     }
diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h
index 4b79ef6482..90cc713f95 100644
--- a/be/src/exec/tablet_info.h
+++ b/be/src/exec/tablet_info.h
@@ -88,7 +88,7 @@ public:
     std::set<std::string> partial_update_input_columns() const {
         return _partial_update_input_columns;
     }
-
+    bool is_strict_mode() const { return _is_strict_mode; }
     std::string debug_string() const;
 
 private:
@@ -103,6 +103,7 @@ private:
     bool _is_dynamic_schema = false;
     bool _is_partial_update = false;
     std::set<std::string> _partial_update_input_columns;
+    bool _is_strict_mode = false;
 };
 
 using OlapTableIndexTablets = TOlapTableIndexTablets;
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 0fc42cc219..8aa53d397e 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -601,6 +601,7 @@ void DeltaWriter::_build_current_tablet_schema(int64_t index_id,
     // set partial update columns info
     _tablet_schema->set_partial_update_info(table_schema_param->is_partial_update(),
                                             table_schema_param->partial_update_input_columns());
+    _tablet_schema->set_is_strict_mode(table_schema_param->is_strict_mode());
 }
 
 void DeltaWriter::_request_slave_tablet_pull_rowset(PNodeInfo node_info) {
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 372cd66352..fb466a7b90 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -50,6 +50,7 @@
 #include "util/crc32c.h"
 #include "util/faststring.h"
 #include "util/key_util.h"
+#include "vec/columns/column_nullable.h"
 #include "vec/common/schema_util.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
@@ -353,9 +354,9 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
                                                      num_rows));
     }
 
-    bool has_default = false;
-    std::vector<bool> use_default_flag;
-    use_default_flag.reserve(num_rows);
+    bool has_default_or_nullable = false;
+    std::vector<bool> use_default_or_null_flag;
+    use_default_or_null_flag.reserve(num_rows);
     std::vector<RowsetSharedPtr> specified_rowsets;
     {
         std::shared_lock rlock(_tablet->get_header_lock());
@@ -375,11 +376,19 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
             auto st = _tablet->lookup_row_key(key, false, specified_rowsets, &loc,
                                               _mow_context->max_version, segment_caches, &rowset);
             if (st.is<NOT_FOUND>()) {
-                if (!_tablet_schema->allow_key_not_exist_in_partial_update()) {
-                    return Status::InternalError("partial update key not exist before");
+                if (_tablet_schema->is_strict_mode()) {
+                    return Status::InternalError(
+                            "partial update in strict mode only support updating rows with an "
+                            "existing key!");
                 }
-                has_default = true;
-                use_default_flag.emplace_back(true);
+
+                if (!_tablet_schema->can_insert_new_rows_in_partial_update()) {
+                    return Status::InternalError(
+                            "the unmentioned columns should have default value or be nullable for "
+                            "newly inserted rows in non-strict mode partial update");
+                }
+                has_default_or_nullable = true;
+                use_default_or_null_flag.emplace_back(true);
                 continue;
             }
             if (!st.ok()) {
@@ -387,17 +396,18 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
                 return st;
             }
             // partial update should not contain invisible columns
-            use_default_flag.emplace_back(false);
+            use_default_or_null_flag.emplace_back(false);
             _rsid_to_rowset.emplace(rowset->rowset_id(), rowset);
             _tablet->prepare_to_read(loc, pos, &_rssid_to_rid);
             _mow_context->delete_bitmap->add({loc.rowset_id, loc.segment_id, 0}, loc.row_id);
         }
     }
-    CHECK(use_default_flag.size() == num_rows);
+    CHECK(use_default_or_null_flag.size() == num_rows);
 
     // read and fill block
     auto mutable_full_columns = full_block.mutate_columns();
-    RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_flag, has_default));
+    RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag,
+                                         has_default_or_nullable));
     // row column should be filled here
     if (_tablet_schema->store_row_column()) {
         // convert block to row store format
@@ -424,8 +434,8 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
 }
 
 Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_full_columns,
-                                           const std::vector<bool>& use_default_flag,
-                                           bool has_default) {
+                                           const std::vector<bool>& use_default_or_null_flag,
+                                           bool has_default_or_nullable) {
     // create old value columns
     auto old_value_block = _tablet_schema->create_missing_columns_block();
     std::vector<uint32_t> cids_missing = _tablet_schema->get_missing_cids();
@@ -468,24 +478,35 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
     // build default value columns
     auto default_value_block = old_value_block.clone_empty();
     auto mutable_default_value_columns = default_value_block.mutate_columns();
-    if (has_default) {
+    if (has_default_or_nullable) {
         for (auto i = 0; i < cids_missing.size(); ++i) {
-            auto default_value = _tablet_schema->column(cids_missing[i]).default_value();
-            vectorized::ReadBuffer rb(const_cast<char*>(default_value.c_str()),
-                                      default_value.size());
-            old_value_block.get_by_position(i).type->from_string(
-                    rb, mutable_default_value_columns[i].get());
+            const auto& column = _tablet_schema->column(cids_missing[i]);
+            if (column.has_default_value()) {
+                auto default_value = _tablet_schema->column(cids_missing[i]).default_value();
+                vectorized::ReadBuffer rb(const_cast<char*>(default_value.c_str()),
+                                          default_value.size());
+                old_value_block.get_by_position(i).type->from_string(
+                        rb, mutable_default_value_columns[i].get());
+            }
         }
     }
 
-    // fill all missing value from mutable_old_columns, need consider default value
-    for (auto idx = 0; idx < use_default_flag.size(); idx++) {
-        if (use_default_flag[idx]) {
-            // use default value
+    // fill all missing value from mutable_old_columns, need to consider default value and null value
+    for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) {
+        if (use_default_or_null_flag[idx]) {
             for (auto i = 0; i < cids_missing.size(); ++i) {
-                CHECK(_tablet_schema->column(cids_missing[i]).has_default_value());
-                mutable_full_columns[cids_missing[i]]->insert_from(
-                        *mutable_default_value_columns[i].get(), 0);
+                // if the column has default value, fiil it with default value
+                // otherwise, if the column is nullable, fill it with null value
+                const auto& tablet_column = _tablet_schema->column(cids_missing[i]);
+                CHECK(tablet_column.has_default_value() || tablet_column.is_nullable());
+                if (tablet_column.has_default_value()) {
+                    mutable_full_columns[cids_missing[i]]->insert_from(
+                            *mutable_default_value_columns[i].get(), 0);
+                } else {
+                    auto nullable_column = assert_cast<vectorized::ColumnNullable*>(
+                            mutable_full_columns[cids_missing[i]].get());
+                    nullable_column->insert_null_elements(1);
+                }
             }
             continue;
         }
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 222977c28a..6f2c46d128 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -129,7 +129,8 @@ public:
 
     void set_mow_context(std::shared_ptr<MowContext> mow_context);
     Status fill_missing_columns(vectorized::MutableColumns& mutable_full_columns,
-                                const std::vector<bool>& use_default_flag, bool has_default);
+                                const std::vector<bool>& use_default_or_null_flag,
+                                bool has_default_or_nullable);
 
 private:
     DISALLOW_COPY_AND_ASSIGN(SegmentWriter);
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index c02e876cd7..9fc1c2eb0f 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -727,8 +727,8 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema) {
             if (_partial_update_input_columns.count(_cols[i].name()) == 0) {
                 _missing_cids.emplace_back(i);
                 auto tablet_column = column(i);
-                if (!tablet_column.has_default_value()) {
-                    _allow_key_not_exist_in_partial_update = false;
+                if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) {
+                    _can_insert_new_rows_in_partial_update = false;
                 }
             } else {
                 _update_cids.emplace_back(i);
@@ -1081,8 +1081,8 @@ void TabletSchema::set_partial_update_info(bool is_partial_update,
         if (_partial_update_input_columns.count(_cols[i].name()) == 0) {
             _missing_cids.emplace_back(i);
             auto tablet_column = column(i);
-            if (!tablet_column.has_default_value()) {
-                _allow_key_not_exist_in_partial_update = false;
+            if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) {
+                _can_insert_new_rows_in_partial_update = false;
             }
         } else {
             _update_cids.emplace_back(i);
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 5a521f3610..06ef1a8344 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -313,9 +313,11 @@ public:
     bool is_partial_update() const { return _is_partial_update; }
     size_t partial_input_column_size() const { return _partial_update_input_columns.size(); }
     bool is_column_missing(size_t cid) const;
-    bool allow_key_not_exist_in_partial_update() const {
-        return _allow_key_not_exist_in_partial_update;
+    bool can_insert_new_rows_in_partial_update() const {
+        return _can_insert_new_rows_in_partial_update;
     }
+    void set_is_strict_mode(bool is_strict_mode) { _is_strict_mode = is_strict_mode; }
+    bool is_strict_mode() const { return _is_strict_mode; }
     std::vector<uint32_t> get_missing_cids() { return _missing_cids; }
     std::vector<uint32_t> get_update_cids() { return _update_cids; }
 
@@ -358,8 +360,10 @@ private:
     std::set<std::string> _partial_update_input_columns;
     std::vector<uint32_t> _missing_cids;
     std::vector<uint32_t> _update_cids;
-    // if key not exist in old rowset, use default value or null
-    bool _allow_key_not_exist_in_partial_update = true;
+    // if key not exist in old rowset, use default value or null value for the unmentioned cols
+    // to generate a new row, only available in non-strict mode
+    bool _can_insert_new_rows_in_partial_update = true;
+    bool _is_strict_mode = false;
 };
 
 bool operator==(const TabletSchema& a, const TabletSchema& b);
diff --git a/docs/en/docs/data-operate/import/import-scenes/load-strict-mode.md b/docs/en/docs/data-operate/import/import-scenes/load-strict-mode.md
index 421a39e5f3..318bc44468 100644
--- a/docs/en/docs/data-operate/import/import-scenes/load-strict-mode.md
+++ b/docs/en/docs/data-operate/import/import-scenes/load-strict-mode.md
@@ -92,7 +92,7 @@ Different import methods set strict mode in different ways.
 
 ## The role of strict mode
 
-Strict mode means strict filtering of column type conversions during import.
+- Restricting the filtering of column type conversions during import.
 
 The strict filtering strategy is as follows:
 
@@ -130,3 +130,51 @@ For an imported column type that contains range restrictions, if the original da
    > 1. Columns in the table allow to import null values
    > 2. After `abc` is converted to Decimal, it will become NULL due to type problem. When strict mode is on, this data will be filtered. And if it is closed, `null` will be imported.
    > 3. Although `10` is an out-of-range value, because its type conforms to the requirements of decimal, strict mode does not affect it. `10` will eventually be filtered in other import processing flows. But not filtered by strict mode.
+
+- Restricting partial column updates to only existing columns
+
+In strict mode, when performing partial column updates, each row of data inserted must have a key that already exists in the table. In non-strict mode, partial column updates can update existing rows with existing keys or insert new rows with non-existing keys.
+
+For example, consider the following table structure:
+
+```
+mysql> desc user_profile;
++------------------+-----------------+------+-------+---------+-------+
+| Field            | Type            | Null | Key   | Default | Extra |
++------------------+-----------------+------+-------+---------+-------+
+| id               | INT             | Yes  | true  | NULL    |       |
+| name             | VARCHAR(10)     | Yes  | false | NULL    | NONE  |
+| age              | INT             | Yes  | false | NULL    | NONE  |
+| city             | VARCHAR(10)     | Yes  | false | NULL    | NONE  |
+| balance          | DECIMALV3(9, 0) | Yes  | false | NULL    | NONE  |
+| last_access_time | DATETIME        | Yes  | false | NULL    | NONE  |
++------------------+-----------------+------+-------+---------+-------+
+```
+
+The table contains the following data:
+
+```
+1,"kevin",18,"shenzhen",400,"2023-07-01 12:00:00"
+```
+
+When a user uses non-strict mode stream load for partial column updates and inserts the following data into the table:
+
+```
+1,500,2023-07-03 12:00:01
+3,23,2023-07-03 12:00:02
+18,9999999,2023-07-03 12:00:03
+```
+
+```
+curl  --location-trusted -u root -H "partial_columns:true" -H "strict_mode:false" -H "column_separator:," -H "columns:id,balance,last_access_time" -T /tmp/test.csv http://host:port/api/db1/user_profile/_stream_load
+```
+
+The existing row in the table will be updated, and two new rows will be inserted. For columns in the inserted data that are not specified by the user, if the column has a default value, it will be filled with the default value. Otherwise, if the column allows NULL, it will be filled with a NULL value. If neither condition is met, the insertion will not succeed.
+
+However, when a user uses strict mode stream load for partial column updates and inserts the above data into the table:
+
+```
+curl  --location-trusted -u root -H "partial_columns:true" -H "strict_mode:true" -H "column_separator:," -H "columns:id,balance,last_access_time" -T /tmp/test.csv http://host:port/api/db1/user_profile/_stream_load
+```
+
+In this case, since strict mode is enabled and the keys `(3)`, `(18)` in the second and third rows, respectively, do not exist in the original table, the import will fail.
\ No newline at end of file
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
index 5087ec02ee..c63caf789d 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
@@ -446,7 +446,7 @@ separated by commas.
 
 7. Strict Mode
 
-   The `strict_mode` attribute is used to set whether the import task runs in strict mode. The format affects the results of column mapping, transformation, and filtering. For a detailed description of strict mode, see the [strict mode](../../../../data-operate/import/import-scenes/load-strict-mode.md) documentation.
+   The `strict_mode` attribute is used to set whether the import task runs in strict mode. The format affects the results of column mapping, transformation, and filtering, and it also controls the behavior of partial updates. For a detailed description of strict mode, see the [strict mode](../../../../data-operate/import/import-scenes/load-strict-mode.md) documentation.
 
 8. Timeout
 
diff --git a/docs/zh-CN/docs/data-operate/import/import-scenes/load-strict-mode.md b/docs/zh-CN/docs/data-operate/import/import-scenes/load-strict-mode.md
index 8dcc2fd043..7708a461fc 100644
--- a/docs/zh-CN/docs/data-operate/import/import-scenes/load-strict-mode.md
+++ b/docs/zh-CN/docs/data-operate/import/import-scenes/load-strict-mode.md
@@ -93,7 +93,7 @@ under the License.
 
 ## 严格模式的作用
 
-严格模式的意思是，对于导入过程中的列类型转换进行严格过滤。
+- 对于导入过程中的列类型转换进行严格过滤。
 
 严格过滤的策略如下：
 
@@ -131,3 +131,50 @@ under the License.
    > 1. 表中的列允许导入空值
    > 2. `abc` 在转换为 Decimal 后，会因类型问题变为 NULL。在严格模式开启的情况下，这类数据将会被过滤。而如果是关闭状态，则会导入 `null`。
    > 3. `10` 虽然是一个超过范围的值，但是因为其类型符合 decimal 的要求，所以严格模式对其不产生影响。`10` 最后会在其他导入处理流程中被过滤。但不会被严格模式过滤。
+
+- 限定部分列更新只能更新已有的列
+
+在严格模式下，部分列更新插入的每一行数据必须满足该行数据的key在表中已经存在。而在而非严格模式下，进行部分列更新时可以更新key已经存在的行，也可以插入key不存在的新行。
+
+例如有表结构如下：
+```
+mysql> desc user_profile;
++------------------+-----------------+------+-------+---------+-------+
+| Field            | Type            | Null | Key   | Default | Extra |
++------------------+-----------------+------+-------+---------+-------+
+| id               | INT             | Yes  | true  | NULL    |       |
+| name             | VARCHAR(10)     | Yes  | false | NULL    | NONE  |
+| age              | INT             | Yes  | false | NULL    | NONE  |
+| city             | VARCHAR(10)     | Yes  | false | NULL    | NONE  |
+| balance          | DECIMALV3(9, 0) | Yes  | false | NULL    | NONE  |
+| last_access_time | DATETIME        | Yes  | false | NULL    | NONE  |
++------------------+-----------------+------+-------+---------+-------+
+```
+
+表中有一条数据如下：
+
+```
+1,"kevin",18,"shenzhen",400,"2023-07-01 12:00:00"
+```
+
+当用户使用非严格模式的stram load部分列更新向表中插入如下数据时
+
+```
+1,500,2023-07-03 12:00:01
+3,23,2023-07-03 12:00:02
+18,9999999,2023-07-03 12:00:03
+```
+
+```
+curl  --location-trusted -u root -H "partial_columns:true" -H "strict_mode:false" -H "column_separator:," -H "columns:id,balance,last_access_time" -T /tmp/test.csv http://host:port/api/db1/user_profile/_stream_load
+```
+
+表中原有的一条数据将会被更新，此外还向表中插入了两条新数据。对于插入的数据中用户没有指定的列，如果该列有默认值，则会以默认值填充；否则，如果该列可以为NULL，则将以NULL值填充；否则本次插入不成功。
+
+而当用户使用严格模式的stram load部分列更新向表中插入上述数据时
+
+```
+curl  --location-trusted -u root -H "partial_columns:true" -H "strict_mode:true" -H "column_separator:," -H "columns:id,balance,last_access_time" -T /tmp/test.csv http://host:port/api/db1/user_profile/_stream_load
+```
+
+此时，由于开启了严格模式且第二、三行的数据的key(`(3)`, `(18)`)不在原表中，所以本次导入会失败。
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
index 223bb8b462..0cdedb51cf 100644
--- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
@@ -431,7 +431,7 @@ curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_h
 
 7. 严格模式
 
-   `strict_mode` 属性用于设置导入任务是否运行在严格模式下。该格式会对列映射、转换和过滤的结果产生影响。关于严格模式的具体说明，可参阅 [严格模式](../../../../data-operate/import/import-scenes/load-strict-mode.md) 文档。
+   `strict_mode` 属性用于设置导入任务是否运行在严格模式下。该属性会对列映射、转换和过滤的结果产生影响，它同时也将控制部分列更新的行为。关于严格模式的具体说明，可参阅 [严格模式](../../../../data-operate/import/import-scenes/load-strict-mode.md) 文档。
 
 8. 超时时间
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
index 591f8191f5..2a05ee9e01 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
@@ -330,7 +330,7 @@ public class NativeInsertStmt extends InsertStmt {
             OlapTableSink sink = (OlapTableSink) dataSink;
             TUniqueId loadId = analyzer.getContext().queryId();
             int sendBatchParallelism = analyzer.getContext().getSessionVariable().getSendBatchParallelism();
-            sink.init(loadId, transactionId, db.getId(), timeoutSecond, sendBatchParallelism, false);
+            sink.init(loadId, transactionId, db.getId(), timeoutSecond, sendBatchParallelism, false, false);
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java
index 7b8ca0371e..982eb813e6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java
@@ -170,7 +170,7 @@ public class LoadingTaskPlanner {
         List<Long> partitionIds = getAllPartitionIds();
         OlapTableSink olapTableSink = new OlapTableSink(table, destTupleDesc, partitionIds,
                 Config.enable_single_replica_load);
-        olapTableSink.init(loadId, txnId, dbId, timeoutS, sendBatchParallelism, false);
+        olapTableSink.init(loadId, txnId, dbId, timeoutS, sendBatchParallelism, false, strictMode);
         olapTableSink.complete();
 
         // 3. Plan fragment
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java
index 94e39de297..ab373c7886 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java
@@ -121,7 +121,7 @@ public class InsertIntoTableCommand extends Command implements ForwardWithSync,
         sink.init(ctx.queryId(), txn.getTxnId(),
                 physicalOlapTableSink.getDatabase().getId(),
                 ctx.getExecTimeout(),
-                ctx.getSessionVariable().getSendBatchParallelism(), false);
+                ctx.getSessionVariable().getSendBatchParallelism(), false, false);
 
         sink.complete();
         TransactionState state = Env.getCurrentGlobalTransactionMgr().getTransactionState(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
index ea6f92616c..411fb548fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
@@ -101,6 +101,8 @@ public class OlapTableSink extends DataSink {
 
     private boolean singleReplicaLoad;
 
+    private boolean isStrictMode = false;
+
     public OlapTableSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, List<Long> partitionIds,
             boolean singleReplicaLoad) {
         this.dstTable = dstTable;
@@ -110,13 +112,14 @@ public class OlapTableSink extends DataSink {
     }
 
     public void init(TUniqueId loadId, long txnId, long dbId, long loadChannelTimeoutS, int sendBatchParallelism,
-            boolean loadToSingleTablet) throws AnalysisException {
+            boolean loadToSingleTablet, boolean isStrictMode) throws AnalysisException {
         TOlapTableSink tSink = new TOlapTableSink();
         tSink.setLoadId(loadId);
         tSink.setTxnId(txnId);
         tSink.setDbId(dbId);
         tSink.setLoadChannelTimeoutS(loadChannelTimeoutS);
         tSink.setSendBatchParallelism(sendBatchParallelism);
+        this.isStrictMode = isStrictMode;
         if (loadToSingleTablet && !(dstTable.getDefaultDistributionInfo() instanceof RandomDistributionInfo)) {
             throw new AnalysisException(
                     "if load_to_single_tablet set to true," + " the olap table must be with random distribution");
@@ -211,6 +214,7 @@ public class OlapTableSink extends DataSink {
         schemaParam.setTableId(table.getId());
         schemaParam.setVersion(table.getIndexMetaByIndexId(table.getBaseIndexId()).getSchemaVersion());
         schemaParam.setIsDynamicSchema(table.isDynamicSchema());
+        schemaParam.setIsStrictMode(isStrictMode);
 
         schemaParam.tuple_desc = tupleDescriptor.toThrift();
         for (SlotDescriptor slotDesc : tupleDescriptor.getSlots()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
index 31d7963e81..2ae793e389 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
@@ -251,7 +251,7 @@ public class StreamLoadPlanner {
         OlapTableSink olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds,
                 Config.enable_single_replica_load);
         olapTableSink.init(loadId, taskInfo.getTxnId(), db.getId(), timeout,
-                taskInfo.getSendBatchParallelism(), taskInfo.isLoadToSingleTablet());
+                taskInfo.getSendBatchParallelism(), taskInfo.isLoadToSingleTablet(), taskInfo.isStrictMode());
         olapTableSink.setPartialUpdateInputColumns(isPartialUpdate, partialUpdateInputColumns);
         olapTableSink.complete();
 
@@ -447,7 +447,7 @@ public class StreamLoadPlanner {
         OlapTableSink olapTableSink = new OlapTableSink(destTable, tupleDesc, partitionIds,
                 Config.enable_single_replica_load);
         olapTableSink.init(loadId, taskInfo.getTxnId(), db.getId(), timeout,
-                taskInfo.getSendBatchParallelism(), taskInfo.isLoadToSingleTablet());
+                taskInfo.getSendBatchParallelism(), taskInfo.isLoadToSingleTablet(), taskInfo.isStrictMode());
         olapTableSink.setPartialUpdateInputColumns(isPartialUpdate, partialUpdateInputColumns);
         olapTableSink.complete();
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java
index fd37ca436c..c6da7304d5 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java
@@ -107,7 +107,7 @@ public class OlapTableSinkTest {
                 new DataProperty(DataProperty.DEFAULT_STORAGE_MEDIUM));
         dstTable.getPartitionInfo().setIsMutable(partition.getId(), true);
         OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(2L), false);
-        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false);
+        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false, false);
         sink.complete();
         LOG.info("sink is {}", sink.toThrift());
         LOG.info("{}", sink.getExplainString("", TExplainLevel.NORMAL));
@@ -144,7 +144,7 @@ public class OlapTableSinkTest {
         };
 
         OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(p1.getId()), false);
-        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false);
+        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false, false);
         try {
             sink.complete();
         } catch (UserException e) {
@@ -169,7 +169,7 @@ public class OlapTableSinkTest {
         };
 
         OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(unknownPartId), false);
-        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false);
+        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false, false);
         sink.complete();
         LOG.info("sink is {}", sink.toThrift());
         LOG.info("{}", sink.getExplainString("", TExplainLevel.NORMAL));
@@ -206,7 +206,7 @@ public class OlapTableSinkTest {
         };
 
         OlapTableSink sink = new OlapTableSink(dstTable, tuple, Lists.newArrayList(p1.getId()), false);
-        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false);
+        sink.init(new TUniqueId(1, 2), 3, 4, 1000, 1, false, false);
         try {
             sink.complete();
         } catch (UserException e) {
diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto
index 7866c3d88f..97c2e6d993 100644
--- a/gensrc/proto/descriptors.proto
+++ b/gensrc/proto/descriptors.proto
@@ -65,5 +65,6 @@ message POlapTableSchemaParam {
     repeated POlapTableIndexSchema indexes = 6;
     optional bool partial_update = 7;
     repeated string partial_update_input_columns = 8;
+    optional bool is_strict_mode = 9 [default = false];
 };
 
diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift
index d8fd59c101..40c89c6748 100644
--- a/gensrc/thrift/Descriptors.thrift
+++ b/gensrc/thrift/Descriptors.thrift
@@ -219,6 +219,7 @@ struct TOlapTableSchemaParam {
     7: optional bool is_dynamic_schema
     8: optional bool is_partial_update
     9: optional list<string> partial_update_input_columns
+    10: optional bool is_strict_mode = false;
 }
 
 struct TTabletLocation {
diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_upsert.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_upsert.out
new file mode 100644
index 0000000000..e4333002db
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_upsert.out
@@ -0,0 +1,12 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+1	kevin	18	shenzhen	400	2023-07-01T12:00
+
+-- !sql --
+1	kevin	18	shenzhen	500	2023-07-03T12:00:01
+3	\N	20	beijing	23	2023-07-03T12:00:02
+18	\N	20	beijing	9999999	2023-07-03T12:00:03
+
+-- !sql --
+1	kevin	18	shenzhen	400	2023-07-01T12:00
+
diff --git a/regression-test/data/unique_with_mow_p0/partial_update/upsert.csv b/regression-test/data/unique_with_mow_p0/partial_update/upsert.csv
new file mode 100644
index 0000000000..7c47b90ce4
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/partial_update/upsert.csv
@@ -0,0 +1,3 @@
+1,500,2023-07-03 12:00:01 
+3,23,2023-07-03 12:00:02 
+18,9999999,2023-07-03 12:00:03
\ No newline at end of file
diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_upsert.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_upsert.groovy
new file mode 100644
index 0000000000..4da4159790
--- /dev/null
+++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_upsert.groovy
@@ -0,0 +1,103 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_partial_update_upsert", "p0") {
+
+    def tableName = "test_partial_update_upsert1"
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+            CREATE TABLE ${tableName} ( 
+                `id` int(11) NULL, 
+                `name` varchar(10) NULL,
+                `age` int(11) NULL DEFAULT "20", 
+                `city` varchar(10) NOT NULL DEFAULT "beijing", 
+                `balance` decimalv3(9, 0) NULL, 
+                `last_access_time` datetime NULL 
+            ) ENGINE = OLAP UNIQUE KEY(`id`) 
+            COMMENT 'OLAP' DISTRIBUTED BY HASH(`id`) 
+            BUCKETS AUTO PROPERTIES ( 
+                "replication_allocation" = "tag.location.default: 1", 
+                "storage_format" = "V2", 
+                "enable_unique_key_merge_on_write" = "true", 
+                "light_schema_change" = "true", 
+                "disable_auto_compaction" = "false", 
+                "enable_single_replica_compaction" = "false" 
+            );
+    """
+    sql """insert into ${tableName} values(1,"kevin",18,"shenzhen",400,"2023-07-01 12:00:00");"""
+    qt_sql """select * from ${tableName} order by id;"""
+    streamLoad {
+        table "${tableName}"
+
+        set 'column_separator', ','
+        set 'format', 'csv'
+        set 'partial_columns', 'true'
+        set 'columns', 'id,balance,last_access_time'
+        set 'strict_mode', 'false'
+
+        file 'upsert.csv'
+        time 10000 // limit inflight 10s
+    }
+    sql "sync"
+    qt_sql """select * from ${tableName} order by id;"""
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
+
+    def tableName2 = "test_partial_update_upsert2"
+    sql """ DROP TABLE IF EXISTS ${tableName2} """
+    sql """
+            CREATE TABLE ${tableName2} ( 
+                `id` int(11) NULL, 
+                `name` varchar(10) NULL,
+                `age` int(11) NULL DEFAULT "20", 
+                `city` varchar(10) NOT NULL DEFAULT "beijing", 
+                `balance` decimalv3(9, 0) NULL, 
+                `last_access_time` datetime NULL 
+            ) ENGINE = OLAP UNIQUE KEY(`id`) 
+            COMMENT 'OLAP' DISTRIBUTED BY HASH(`id`) 
+            BUCKETS AUTO PROPERTIES ( 
+                "replication_allocation" = "tag.location.default: 1", 
+                "storage_format" = "V2", 
+                "enable_unique_key_merge_on_write" = "true", 
+                "light_schema_change" = "true", 
+                "disable_auto_compaction" = "false", 
+                "enable_single_replica_compaction" = "false" 
+            );
+    """
+    sql """insert into ${tableName2} values(1,"kevin",18,"shenzhen",400,"2023-07-01 12:00:00");"""
+    qt_sql """select * from ${tableName2} order by id;"""
+    streamLoad {
+        table "${tableName2}"
+
+        set 'column_separator', ','
+        set 'format', 'csv'
+        set 'partial_columns', 'true'
+        set 'columns', 'id,balance,last_access_time'
+        set 'strict_mode', 'true'
+
+        file 'upsert.csv'
+        time 10000 // limit inflight 10s
+
+        check {result, exception, startTime, endTime ->
+            assertTrue(exception == null)
+            def json = parseJson(result)
+            assertEquals("fail", json.Status.toLowerCase())
+        }
+    }
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org