You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ji...@apache.org on 2023/04/21 03:19:07 UTC

[doris] branch master updated: [Fix](dynamic table) fix dynamic table with insert into and column al… (#18808)

This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8cc0af150a [Fix](dynamic table) fix dynamic table with insert into and column al… (#18808)
8cc0af150a is described below

commit 8cc0af150a7f8f82ccca4ec88a8c0608a2d98b1e
Author: lihangyu <15...@163.com>
AuthorDate: Fri Apr 21 11:19:00 2023 +0800

    [Fix](dynamic table) fix dynamic table with insert into and column al… (#18808)
    
    1. The num_rows should be correctly set
    2. insert into has no dynamic column
---
 be/src/olap/memtable.cpp                            | 11 +++++++----
 be/src/vec/columns/column_object.cpp                | 15 +++++++++------
 be/src/vec/columns/column_object.h                  | 13 +++++++++++++
 regression-test/suites/dynamic_table_p0/load.groovy |  4 ++--
 4 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 9084cc617a..1b582b2064 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -416,12 +416,14 @@ void MemTable::unfold_variant_column(vectorized::Block& block) {
     if (block.rows() == 0) {
         return;
     }
-    vectorized::ColumnWithTypeAndName variant_column =
-            block.get_by_name(BeConsts::DYNAMIC_COLUMN_NAME);
+    vectorized::ColumnWithTypeAndName* variant_column =
+            block.try_get_by_name(BeConsts::DYNAMIC_COLUMN_NAME);
+    if (!variant_column) {
+        return;
+    }
     // remove it
-    block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
     vectorized::ColumnObject& object_column =
-            assert_cast<vectorized::ColumnObject&>(variant_column.column->assume_mutable_ref());
+            assert_cast<vectorized::ColumnObject&>(variant_column->column->assume_mutable_ref());
     // extend
     for (auto& entry : object_column.get_subcolumns()) {
         if (entry->path.get_path() == vectorized::ColumnObject::COLUMN_NAME_DUMMY) {
@@ -430,6 +432,7 @@ void MemTable::unfold_variant_column(vectorized::Block& block) {
         block.insert({entry->data.get_finalized_column().get_ptr(),
                       entry->data.get_least_common_type(), entry->path.get_path()});
     }
+    block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
 }
 
 void MemTable::serialize_block_to_row_column(vectorized::Block& block) {
diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp
index 7da8e82f39..89aabbab94 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -534,6 +534,12 @@ const ColumnPtr& ColumnObject::Subcolumn::get_finalized_column_ptr() const {
     return data[0];
 }
 
+void ColumnObject::Subcolumn::remove_nullable() {
+    assert(is_finalized());
+    data[0] = doris::vectorized::remove_nullable(data[0]);
+    least_common_type.remove_nullable();
+}
+
 ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_)
         : type(std::move(type_)),
           base_type(getBaseTypeOfArray(type)),
@@ -921,7 +927,6 @@ void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src,
     // if src and dst is empty, we just increase the num_rows of dst and fill
     // num_rows of default values when meet new data
     size_t num_rows = dst.rows();
-    bool need_inc_row_num = true;
     for (auto& entry : dst.get_subcolumns()) {
         const auto* src_subcol = src.get_subcolumn(entry->path);
         if (src_subcol == nullptr) {
@@ -933,6 +938,7 @@ void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src,
             const auto& src_column = src_subcol->get_finalized_column();
             inserter(src_column, &entry->data.get_finalized_column());
         }
+        dst.set_num_rows(entry->data.get_finalized_column().size());
     }
     for (const auto& entry : src.get_subcolumns()) {
         // encounter a new column
@@ -942,15 +948,12 @@ void align_variant_by_name_and_type(ColumnObject& dst, const ColumnObject& src,
             auto new_column = type->create_column();
             new_column->insert_many_defaults(num_rows);
             inserter(entry->data.get_finalized_column(), new_column.get());
-            if (dst.empty()) {
-                // add_sub_column updated num_rows of dst object
-                need_inc_row_num = false;
-            }
+            dst.set_num_rows(new_column->size());
             dst.add_sub_column(entry->path, std::move(new_column));
         }
     }
     num_rows += row_cnt;
-    if (need_inc_row_num) {
+    if (dst.empty()) {
         dst.incr_num_rows(row_cnt);
     }
 #ifndef NDEBUG
diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h
index b4b868c798..7fec48a0c8 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -39,6 +39,7 @@
 #include "vec/core/field.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
 #include "vec/json/path_in_data.h"
 
 class SipHash;
@@ -140,6 +141,8 @@ public:
 
         const ColumnPtr& get_finalized_column_ptr() const;
 
+        void remove_nullable();
+
         friend class ColumnObject;
 
     private:
@@ -155,6 +158,8 @@ public:
 
             size_t get_dimensions() const { return num_dimensions; }
 
+            void remove_nullable() { type = doris::vectorized::remove_nullable(type); }
+
         private:
             DataTypePtr type;
             DataTypePtr base_type;
@@ -203,6 +208,12 @@ public:
     // return null if not found
     const Subcolumn* get_subcolumn(const PathInData& key) const;
 
+    /** More efficient methods of manipulation */
+    [[noreturn]] IColumn& get_data() { LOG(FATAL) << "Not implemented method get_data()"; }
+    [[noreturn]] const IColumn& get_data() const {
+        LOG(FATAL) << "Not implemented method get_data()";
+    }
+
     // return null if not found
     Subcolumn* get_subcolumn(const PathInData& key);
 
@@ -210,6 +221,8 @@ public:
 
     void incr_num_rows(size_t n) { num_rows += n; }
 
+    void set_num_rows(size_t n) { num_rows = n; }
+
     size_t rows() const { return num_rows; }
 
     /// Adds a subcolumn from existing IColumn.
diff --git a/regression-test/suites/dynamic_table_p0/load.groovy b/regression-test/suites/dynamic_table_p0/load.groovy
index f4bd89aba6..4837ca3cd1 100644
--- a/regression-test/suites/dynamic_table_p0/load.groovy
+++ b/regression-test/suites/dynamic_table_p0/load.groovy
@@ -123,8 +123,8 @@ suite("regression_test_dynamic_table", "dynamic_table"){
     json_load_unique("btc_transactions.json", "test_btc_json")
     json_load_unique("ghdata_sample.json", "test_ghdata_json")
     json_load_unique("nbagames_sample.json", "test_nbagames_json")
-    // sql """insert into test_ghdata_json_unique select * from test_ghdata_json_unique"""
-    // sql """insert into test_btc_json_unique select * from test_btc_json_unique"""
+    sql """insert into test_ghdata_json_unique select * from test_ghdata_json"""
+    sql """insert into test_btc_json_unique select * from test_btc_json"""
 
     // load more
     table_name = "gharchive";


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org