You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ji...@apache.org on 2023/06/01 02:25:13 UTC
[doris] branch master updated: [refactor](dynamic table) Make segment_writer unaware of dynamic schema, and ensure parsing is exception-safe. (#19594)

This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e21318834 [refactor](dynamic table) Make segment_writer unaware of dynamic schema, and ensure parsing is exception-safe. (#19594)
9e21318834 is described below

commit 9e21318834d5fd886def3cf4a81641f5d55ff8fd
Author: lihangyu <15...@163.com>
AuthorDate: Thu Jun 1 10:25:04 2023 +0800

    [refactor](dynamic table) Make segment_writer unaware of dynamic schema, and ensure parsing is exception-safe. (#19594)
    
    1. make ColumnObject exception safe
    2. introduce FlushContext and construct schema at memtable flush stage to make segment independent from dynamic schema
    3. add more test cases
---
 be/src/olap/memtable.cpp                           |   80 +-
 be/src/olap/memtable.h                             |    4 +-
 be/src/olap/rowset/beta_rowset_writer.cpp          |   23 +-
 be/src/olap/rowset/beta_rowset_writer.h            |   12 +-
 be/src/olap/rowset/rowset_writer.h                 |   15 +-
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |   72 +-
 be/src/olap/rowset/segment_v2/segment_writer.h     |   12 +-
 be/src/vec/columns/column_object.cpp               |  125 +-
 be/src/vec/columns/column_object.h                 |   26 +-
 be/src/vec/common/schema_util.cpp                  |   18 +-
 be/src/vec/common/schema_util.h                    |    2 +-
 be/src/vec/data_types/convert_field_to_type.cpp    |   50 +-
 be/src/vec/data_types/convert_field_to_type.h      |    4 +-
 be/src/vec/data_types/get_least_supertype.cpp      |  210 ++-
 be/src/vec/data_types/get_least_supertype.h        |    8 +-
 be/src/vec/exec/format/json/new_json_reader.cpp    |   35 +-
 be/src/vec/json/json_parser.cpp                    |    5 +-
 be/src/vec/json/parse2column.cpp                   |   40 +-
 be/src/vec/json/parse2column.h                     |   10 +-
 be/src/vec/olap/olap_data_convertor.cpp            |    8 +-
 lowercase.json                                     |    5 +
 .../data/dynamic_table_p0/floating_point.json      |    4 +
 .../data/dynamic_table_p0/floating_point2.json     |    4 +
 .../data/dynamic_table_p0/floating_point3.json     |    4 +
 .../data/dynamic_table_p0/invalid_dimension.json   |    4 +
 .../data/dynamic_table_p0/invalid_format.json      |    3 +
 regression-test/data/dynamic_table_p0/load.out     | 1939 ++++++++++++++++++++
 .../data/dynamic_table_p0/lowercase.json           |    4 +
 regression-test/data/dynamic_table_p0/sql/q05.out  |   14 +-
 .../data/dynamic_table_p0/uppercase.json           |    4 +
 .../suites/dynamic_table_p0/load.groovy            |   77 +
 31 files changed, 2481 insertions(+), 340 deletions(-)

diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 6bc82b052c..c398a22b15 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -45,15 +45,17 @@
 #include "util/doris_metrics.h"
 #include "util/runtime_profile.h"
 #include "util/stopwatch.hpp"
+#include "util/string_util.h"
 #include "vec/aggregate_functions/aggregate_function_reader.h"
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_object.h"
 #include "vec/columns/column_string.h"
 #include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
 #include "vec/core/column_with_type_and_name.h"
 #include "vec/data_types/data_type.h"
-#include "vec/data_types/serde/data_type_serde.h"
+#include "vec/data_types/data_type_factory.hpp"
 #include "vec/json/path_in_data.h"
 #include "vec/jsonb/serialize.h"
 
@@ -475,12 +477,14 @@ Status MemTable::_do_flush() {
         _aggregate<true>();
     }
     vectorized::Block block = _output_mutable_block.to_block();
+    FlushContext ctx;
+    ctx.block = &block;
     if (_tablet_schema->is_dynamic_schema()) {
         // Unfold variant column
-        unfold_variant_column(block);
+        RETURN_IF_ERROR(unfold_variant_column(block, &ctx));
     }
     SCOPED_RAW_TIMER(&_stat.segment_writer_ns);
-    RETURN_IF_ERROR(_rowset_writer->flush_single_memtable(&block, &_flush_size));
+    RETURN_IF_ERROR(_rowset_writer->flush_single_memtable(&block, &_flush_size, &ctx));
     return Status::OK();
 }
 
@@ -488,27 +492,77 @@ Status MemTable::close() {
     return flush();
 }
 
-void MemTable::unfold_variant_column(vectorized::Block& block) {
+Status MemTable::unfold_variant_column(vectorized::Block& block, FlushContext* ctx) {
     if (block.rows() == 0) {
-        return;
+        return Status::OK();
     }
+
+    // Sanitize block to match exactly from the same type of frontend meta
+    vectorized::schema_util::FullBaseSchemaView schema_view;
+    schema_view.table_id = _tablet_schema->table_id();
     vectorized::ColumnWithTypeAndName* variant_column =
             block.try_get_by_name(BeConsts::DYNAMIC_COLUMN_NAME);
     if (!variant_column) {
-        return;
+        return Status::OK();
     }
-    // remove it
+    auto base_column = variant_column->column;
     vectorized::ColumnObject& object_column =
-            assert_cast<vectorized::ColumnObject&>(variant_column->column->assume_mutable_ref());
-    // extend
+            assert_cast<vectorized::ColumnObject&>(base_column->assume_mutable_ref());
+    if (object_column.empty()) {
+        block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
+        return Status::OK();
+    }
+    object_column.finalize();
+    // Has extended columns
+    RETURN_IF_ERROR(vectorized::schema_util::send_fetch_full_base_schema_view_rpc(&schema_view));
+    // Dynamic Block consists of two parts, dynamic part of columns and static part of columns
+    //  static   dynamic
+    // | ----- | ------- |
+    // The static ones are original _tablet_schame columns
+    TabletSchemaSPtr flush_schema = std::make_shared<TabletSchema>(*_tablet_schema);
+    vectorized::Block flush_block(std::move(block));
+    // The dynamic ones are auto generated and extended, append them the the orig_block
     for (auto& entry : object_column.get_subcolumns()) {
-        if (entry->path.get_path() == vectorized::ColumnObject::COLUMN_NAME_DUMMY) {
+        const std::string& column_name = entry->path.get_path();
+        auto column_iter = schema_view.column_name_to_column.find(column_name);
+        if (UNLIKELY(column_iter == schema_view.column_name_to_column.end())) {
+            // Column maybe dropped by light weight schema change DDL
             continue;
         }
-        block.insert({entry->data.get_finalized_column().get_ptr(),
-                      entry->data.get_least_common_type(), entry->path.get_path()});
+        TabletColumn column(column_iter->second);
+        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(
+                column, column.is_nullable());
+        // Dynamic generated columns does not appear in original tablet schema
+        if (_tablet_schema->field_index(column.name()) < 0) {
+            flush_schema->append_column(column);
+            flush_block.insert({data_type->create_column(), data_type, column.name()});
+        }
+    }
+
+    // Ensure column are all present at this schema version.Otherwise there will be some senario:
+    //  Load1 -> version(10) with schema [a, b, c, d, e], d & e is new added columns and schema version became 10
+    //  Load2 -> version(10) with schema [a, b, c] and has no extended columns and fetched the schema at version 10
+    //  Load2 will persist meta with [a, b, c] but Load1 will persist meta with [a, b, c, d, e]
+    // So we should make sure that rowset at the same schema version alawys contain the same size of columns.
+    // so that all columns at schema_version is in either _tablet_schema or schema_change_recorder
+    for (const auto& [name, column] : schema_view.column_name_to_column) {
+        if (_tablet_schema->field_index(name) == -1) {
+            const auto& tcolumn = schema_view.column_name_to_column[name];
+            TabletColumn new_column(tcolumn);
+            _rowset_writer->mutable_schema_change_recorder()->add_extended_columns(
+                    column, schema_view.schema_version);
+        }
     }
-    block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
+
+    // Last schema alignment before flush to disk, due to the schema maybe variant before this procedure
+    // Eg. add columnA(INT) -> drop ColumnA -> add ColumnA(Double), then columnA could be type of `Double`,
+    // unfold will cast to Double type
+    RETURN_IF_ERROR(vectorized::schema_util::unfold_object(
+            flush_block.get_position_by_name(BeConsts::DYNAMIC_COLUMN_NAME), flush_block, true));
+    flush_block.erase(BeConsts::DYNAMIC_COLUMN_NAME);
+    ctx->flush_schema = flush_schema;
+    block.swap(flush_block);
+    return Status::OK();
 }
 
 void MemTable::serialize_block_to_row_column(vectorized::Block& block) {
diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h
index b99127bbf6..95d41df62d 100644
--- a/be/src/olap/memtable.h
+++ b/be/src/olap/memtable.h
@@ -44,6 +44,7 @@ class SlotDescriptor;
 class TabletSchema;
 class TupleDescriptor;
 enum KeysType : int;
+struct FlushContext;
 
 // row pos in _input_mutable_block
 struct RowInBlock {
@@ -163,7 +164,8 @@ private:
     // Eg. [A | B | C | (D, E, F)]
     // After unfold block structure changed to -> [A | B | C | D | E | F]
     // The expanded D, E, F is dynamic part of the block
-    void unfold_variant_column(vectorized::Block& block);
+    // The flushed Block columns should match exactly from the same type of frontend meta
+    Status unfold_variant_column(vectorized::Block& block, FlushContext* ctx);
 
 private:
     TabletSharedPtr _tablet;
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 76a2a2d4a6..5358732daa 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -41,6 +41,7 @@
 #include "olap/olap_define.h"
 #include "olap/rowset/beta_rowset.h"
 #include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_writer.h"
 #include "olap/rowset/segment_v2/inverted_index_cache.h"
 #include "olap/rowset/segment_v2/inverted_index_desc.h"
 #include "olap/rowset/segment_v2/segment.h"
@@ -132,7 +133,9 @@ Status BetaRowsetWriter::add_block(const vectorized::Block* block) {
         return Status::OK();
     }
     if (UNLIKELY(_segment_writer == nullptr)) {
-        RETURN_IF_ERROR(_create_segment_writer(&_segment_writer, block));
+        FlushContext ctx;
+        ctx.block = block;
+        RETURN_IF_ERROR(_create_segment_writer(&_segment_writer, &ctx));
     }
     return _add_block(block, &_segment_writer);
 }
@@ -406,7 +409,8 @@ Status BetaRowsetWriter::_segcompaction_ramaining_if_necessary() {
 }
 
 Status BetaRowsetWriter::_add_block(const vectorized::Block* block,
-                                    std::unique_ptr<segment_v2::SegmentWriter>* segment_writer) {
+                                    std::unique_ptr<segment_v2::SegmentWriter>* segment_writer,
+                                    const FlushContext* flush_ctx) {
     size_t block_size_in_bytes = block->bytes();
     size_t block_row_num = block->rows();
     size_t row_avg_size_in_bytes = std::max((size_t)1, block_size_in_bytes / block_row_num);
@@ -417,7 +421,7 @@ Status BetaRowsetWriter::_add_block(const vectorized::Block* block,
         if (UNLIKELY(max_row_add < 1)) {
             // no space for another single row, need flush now
             RETURN_IF_ERROR(_flush_segment_writer(segment_writer));
-            RETURN_IF_ERROR(_create_segment_writer(segment_writer, block));
+            RETURN_IF_ERROR(_create_segment_writer(segment_writer, flush_ctx));
             max_row_add = (*segment_writer)->max_row_to_add(row_avg_size_in_bytes);
             DCHECK(max_row_add > 0);
         }
@@ -462,13 +466,14 @@ Status BetaRowsetWriter::flush() {
     return Status::OK();
 }
 
-Status BetaRowsetWriter::flush_single_memtable(const vectorized::Block* block, int64* flush_size) {
+Status BetaRowsetWriter::flush_single_memtable(const vectorized::Block* block, int64* flush_size,
+                                               const FlushContext* ctx) {
     if (block->rows() == 0) {
         return Status::OK();
     }
 
     std::unique_ptr<segment_v2::SegmentWriter> writer;
-    RETURN_IF_ERROR(_create_segment_writer(&writer, block));
+    RETURN_IF_ERROR(_create_segment_writer(&writer, ctx));
     RETURN_IF_ERROR(_add_block(block, &writer));
     RETURN_IF_ERROR(_flush_segment_writer(&writer, flush_size));
     RETURN_IF_ERROR(_segcompaction_if_necessary());
@@ -666,7 +671,7 @@ RowsetSharedPtr BetaRowsetWriter::build_tmp() {
 
 Status BetaRowsetWriter::_do_create_segment_writer(
         std::unique_ptr<segment_v2::SegmentWriter>* writer, bool is_segcompaction, int64_t begin,
-        int64_t end, const vectorized::Block* block) {
+        int64_t end, const FlushContext* flush_ctx) {
     std::string path;
     int32_t segment_id = 0;
     if (is_segcompaction) {
@@ -715,7 +720,7 @@ Status BetaRowsetWriter::_do_create_segment_writer(
             std::lock_guard<SpinLock> l(_lock);
             _file_writers.push_back(std::move(file_writer));
         }
-        auto s = (*writer)->init(block);
+        auto s = (*writer)->init(flush_ctx);
         if (!s.ok()) {
             LOG(WARNING) << "failed to init segment writer: " << s.to_string();
             writer->reset(nullptr);
@@ -727,7 +732,7 @@ Status BetaRowsetWriter::_do_create_segment_writer(
 }
 
 Status BetaRowsetWriter::_create_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* writer,
-                                                const vectorized::Block* block) {
+                                                const FlushContext* flush_ctx) {
     size_t total_segment_num = _num_segment - _segcompacted_point + 1 + _num_segcompacted;
     if (UNLIKELY(total_segment_num > config::max_segment_num_per_rowset)) {
         LOG(WARNING) << "too many segments in rowset."
@@ -738,7 +743,7 @@ Status BetaRowsetWriter::_create_segment_writer(std::unique_ptr<segment_v2::Segm
                      << " _num_segcompacted:" << _num_segcompacted;
         return Status::Error<TOO_MANY_SEGMENTS>();
     } else {
-        return _do_create_segment_writer(writer, false, -1, -1, block);
+        return _do_create_segment_writer(writer, false, -1, -1, flush_ctx);
     }
 }
 
diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h
index b7d5b10ee6..389c1bbede 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -79,7 +79,8 @@ public:
 
     // Return the file size flushed to disk in "flush_size"
     // This method is thread-safe.
-    Status flush_single_memtable(const vectorized::Block* block, int64_t* flush_size) override;
+    Status flush_single_memtable(const vectorized::Block* block, int64_t* flush_size,
+                                 const FlushContext* ctx = nullptr) override;
 
     RowsetSharedPtr build() override;
 
@@ -106,7 +107,7 @@ public:
     int32_t get_atomic_num_segment() const override { return _num_segment.load(); }
 
     // Maybe modified by local schema change
-    vectorized::schema_util::LocalSchemaChangeRecorder* mutable_schema_change_recorder() {
+    vectorized::schema_util::LocalSchemaChangeRecorder* mutable_schema_change_recorder() override {
         return _context.schema_change_recorder.get();
     }
 
@@ -122,13 +123,14 @@ public:
 
 private:
     Status _add_block(const vectorized::Block* block,
-                      std::unique_ptr<segment_v2::SegmentWriter>* writer);
+                      std::unique_ptr<segment_v2::SegmentWriter>* writer,
+                      const FlushContext* flush_ctx = nullptr);
 
     Status _do_create_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* writer,
                                      bool is_segcompaction, int64_t begin, int64_t end,
-                                     const vectorized::Block* block = nullptr);
+                                     const FlushContext* ctx = nullptr);
     Status _create_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* writer,
-                                  const vectorized::Block* block = nullptr);
+                                  const FlushContext* ctx = nullptr);
     Status _flush_segment_writer(std::unique_ptr<segment_v2::SegmentWriter>* writer,
                                  int64_t* flush_size = nullptr);
     void _build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta);
diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h
index 1bdc4daa2d..6bfa9f6e9a 100644
--- a/be/src/olap/rowset/rowset_writer.h
+++ b/be/src/olap/rowset/rowset_writer.h
@@ -20,16 +20,25 @@
 #include <gen_cpp/olap_file.pb.h>
 #include <gen_cpp/types.pb.h>
 
+#include "common/factory_creator.h"
 #include "gutil/macros.h"
 #include "olap/column_mapping.h"
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_writer_context.h"
+#include "olap/tablet_schema.h"
 #include "vec/core/block.h"
 
 namespace doris {
 
 class MemTable;
 
+// Context for single memtable flush
+struct FlushContext {
+    ENABLE_FACTORY_CREATOR(FlushContext);
+    TabletSchemaSPtr flush_schema = nullptr;
+    const vectorized::Block* block = nullptr;
+};
+
 class RowsetWriter {
 public:
     RowsetWriter() = default;
@@ -59,7 +68,8 @@ public:
     }
     virtual Status final_flush() { return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); }
 
-    virtual Status flush_single_memtable(const vectorized::Block* block, int64_t* flush_size) {
+    virtual Status flush_single_memtable(const vectorized::Block* block, int64_t* flush_size,
+                                         const FlushContext* ctx = nullptr) {
         return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
     }
 
@@ -95,6 +105,9 @@ public:
 
     virtual void set_segment_start_id(int num_segment) { LOG(FATAL) << "not supported!"; }
 
+    virtual vectorized::schema_util::LocalSchemaChangeRecorder*
+    mutable_schema_change_recorder() = 0;
+
 private:
     DISALLOW_COPY_AND_ASSIGN(RowsetWriter);
 };
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 3feb778e4a..f2e52736db 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -35,12 +35,15 @@
 #include "olap/key_coder.h"
 #include "olap/olap_common.h"
 #include "olap/primary_key_index.h"
-#include "olap/row_cursor.h"                      // IWYU pragma: keep
+#include "olap/row_cursor.h" // IWYU pragma: keep
+#include "olap/row_cursor.h" // RowCursor
+#include "olap/rowset/rowset_writer.h"
 #include "olap/rowset/rowset_writer_context.h"    // RowsetWriterContext
 #include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter
 #include "olap/rowset/segment_v2/page_io.h"
 #include "olap/rowset/segment_v2/page_pointer.h"
 #include "olap/short_key_index.h"
+#include "olap/tablet_schema.h"
 #include "runtime/memory/mem_tracker.h"
 #include "service/point_query_executor.h"
 #include "util/coding.h"
@@ -113,30 +116,20 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t column_id,
     }
 }
 
-Status SegmentWriter::init(const vectorized::Block* block) {
+Status SegmentWriter::init(const FlushContext* flush_ctx) {
     std::vector<uint32_t> column_ids;
     int column_cnt = _tablet_schema->num_columns();
-    if (block && !_tablet_schema->is_partial_update()) {
-        // partial update only contain several columns
-        column_cnt = block->columns();
+    if (flush_ctx && flush_ctx->flush_schema) {
+        column_cnt = flush_ctx->flush_schema->num_columns();
     }
     for (uint32_t i = 0; i < column_cnt; ++i) {
         column_ids.emplace_back(i);
     }
-    return init(column_ids, true, block);
-}
-
-// Dynamic table with extended columns and directly write from delta writer
-// Compaction/SchemaChange path will use the latest schema version of rowset
-// as it's shcema, so it's block is not from dynamic table load procedure.
-// If it is a dynamic table load procedure we should handle auto generated columns.
-bool SegmentWriter::_should_create_writers_with_dynamic_block(size_t num_columns_in_block) {
-    return _tablet_schema->is_dynamic_schema() && _opts.is_direct_write &&
-           num_columns_in_block > _tablet_schema->columns().size();
+    return init(column_ids, true, flush_ctx);
 }
 
 Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key,
-                           const vectorized::Block* block) {
+                           const FlushContext* flush_ctx) {
     DCHECK(_column_writers.empty());
     DCHECK(_column_ids.empty());
     _has_key = has_key;
@@ -144,7 +137,8 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key,
     _column_ids.insert(_column_ids.end(), col_ids.begin(), col_ids.end());
     _olap_data_convertor = std::make_unique<vectorized::OlapBlockDataConvertor>();
     _opts.compression_type =
-            (block == nullptr || block->bytes() > config::segment_compression_threshold_kb * 1024)
+            (flush_ctx == nullptr || flush_ctx->block == nullptr ||
+             flush_ctx->block->bytes() > config::segment_compression_threshold_kb * 1024)
                     ? _tablet_schema->compression_type()
                     : NO_COMPRESSION;
     auto create_column_writer = [&](uint32_t cid, const auto& column) -> auto {
@@ -240,10 +234,10 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key,
         return Status::OK();
     };
 
-    if (block && _should_create_writers_with_dynamic_block(block->columns())) {
-        RETURN_IF_ERROR(_create_writers_with_dynamic_block(block, create_column_writer));
+    if (flush_ctx && flush_ctx->flush_schema) {
+        RETURN_IF_ERROR(_create_writers(*flush_ctx->flush_schema, col_ids, create_column_writer));
     } else {
-        RETURN_IF_ERROR(_create_writers(create_column_writer));
+        RETURN_IF_ERROR(_create_writers(*_tablet_schema, col_ids, create_column_writer));
     }
 
     // we don't need the short key index for unique key merge on write table.
@@ -266,41 +260,11 @@ Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key,
 }
 
 Status SegmentWriter::_create_writers(
+        const TabletSchema& tablet_schema, const std::vector<uint32_t>& col_ids,
         std::function<Status(uint32_t, const TabletColumn&)> create_column_writer) {
-    _olap_data_convertor->reserve(_column_ids.size());
-    for (auto& cid : _column_ids) {
-        RETURN_IF_ERROR(create_column_writer(cid, _tablet_schema->column(cid)));
-    }
-    return Status::OK();
-}
-
-// Dynamic Block consists of two parts, dynamic part of columns and static part of columns
-//  static   dynamic
-// | ----- | ------- |
-// the static ones are original _tablet_schame columns
-// the dynamic ones are auto generated and extended from file scan
-Status SegmentWriter::_create_writers_with_dynamic_block(
-        const vectorized::Block* block,
-        std::function<Status(uint32_t, const TabletColumn&)> create_column_writer) {
-    // generate writers from schema and extended schema info
-    _olap_data_convertor->reserve(block->columns());
-    // new columns added, query column info from Master
-    vectorized::schema_util::FullBaseSchemaView schema_view;
-    CHECK(block->columns() > _tablet_schema->num_columns());
-    schema_view.table_id = _tablet_schema->table_id();
-    RETURN_IF_ERROR(vectorized::schema_util::send_fetch_full_base_schema_view_rpc(&schema_view));
-    // create writers with static columns
-    for (size_t i = 0; i < _tablet_schema->columns().size(); ++i) {
-        create_column_writer(i, _tablet_schema->column(i));
-    }
-    // create writers with auto generated columns
-    for (size_t i = _tablet_schema->columns().size(); i < block->columns(); ++i) {
-        const auto& column_type_name = block->get_by_position(i);
-        const auto& tcolumn = schema_view.column_name_to_column[column_type_name.name];
-        TabletColumn new_column(tcolumn);
-        RETURN_IF_ERROR(create_column_writer(i, new_column));
-        _opts.rowset_ctx->schema_change_recorder->add_extended_columns(new_column,
-                                                                       schema_view.schema_version);
+    _olap_data_convertor->reserve(col_ids.size());
+    for (auto& cid : col_ids) {
+        RETURN_IF_ERROR(create_column_writer(cid, tablet_schema.column(cid)));
     }
     return Status::OK();
 }
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index ea97958825..09a3576cf5 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -35,6 +35,7 @@
 #include "gutil/macros.h"
 #include "gutil/strings/substitute.h"
 #include "olap/olap_define.h"
+#include "olap/rowset/rowset_writer.h"
 #include "olap/rowset/segment_v2/column_writer.h"
 #include "olap/tablet.h"
 #include "olap/tablet_schema.h"
@@ -57,6 +58,7 @@ class ShortKeyIndexBuilder;
 class PrimaryKeyIndexBuilder;
 class KeyCoder;
 struct RowsetWriterContext;
+struct FlushContext;
 
 namespace io {
 class FileWriter;
@@ -89,11 +91,11 @@ public:
                            std::shared_ptr<MowContext> mow_context);
     ~SegmentWriter();
 
-    Status init(const vectorized::Block* block = nullptr);
+    Status init(const FlushContext* flush_ctx = nullptr);
 
     // for vertical compaction
     Status init(const std::vector<uint32_t>& col_ids, bool has_key,
-                const vectorized::Block* block = nullptr);
+                const FlushContext* flush_ctx = nullptr);
 
     template <typename RowType>
     Status append_row(const RowType& row);
@@ -133,11 +135,9 @@ public:
                                 const std::vector<bool>& use_default_flag, bool has_default);
 
 private:
-    Status _create_writers_with_dynamic_block(
-            const vectorized::Block* block,
-            std::function<Status(uint32_t, const TabletColumn&)> writer_creator);
-    Status _create_writers(std::function<Status(uint32_t, const TabletColumn&)> writer_creator);
     DISALLOW_COPY_AND_ASSIGN(SegmentWriter);
+    Status _create_writers(const TabletSchema& tablet_schema, const std::vector<uint32_t>& col_ids,
+                           std::function<Status(uint32_t, const TabletColumn&)> writer_creator);
     Status _write_data();
     Status _write_ordinal_index();
     Status _write_zone_map();
diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp
index 89aabbab94..7830e3fe8f 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -29,6 +29,8 @@
 #include <map>
 #include <optional>
 
+#include "common/exception.h"
+#include "common/status.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/columns_number.h"
@@ -158,7 +160,6 @@ private:
 /// Returns 0 for scalar fields.
 class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
 public:
-    explicit FieldVisitorToNumberOfDimensions(Status* st) : _st(st) {}
     size_t operator()(const Array& x) const {
         const size_t size = x.size();
         std::optional<size_t> dimensions;
@@ -172,8 +173,8 @@ public:
             if (!dimensions) {
                 dimensions = current_dimensions;
             } else if (current_dimensions != *dimensions) {
-                *_st = Status::InvalidArgument(
-                        "Number of dimensions mismatched among array elements");
+                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                                       "Number of dimensions mismatched among array elements");
                 return 0;
             }
         }
@@ -183,9 +184,6 @@ public:
     size_t operator()(const T&) const {
         return 0;
     }
-
-private:
-    mutable Status* _st;
 };
 
 /// Visitor that allows to get type of scalar field
@@ -217,9 +215,16 @@ public:
         return 0;
     }
     size_t operator()(const Int64& x) {
-        // Only Int64 at present
+        // // Only Int64 | Int32 at present
+        // field_types.insert(FieldType::Int64);
+        // type_indexes.insert(TypeIndex::Int64);
+        // return 0;
         field_types.insert(FieldType::Int64);
-        type_indexes.insert(TypeIndex::Int64);
+        if (x <= std::numeric_limits<Int32>::max() && x >= std::numeric_limits<Int32>::min()) {
+            type_indexes.insert(TypeIndex::Int32);
+        } else {
+            type_indexes.insert(TypeIndex::Int64);
+        }
         return 0;
     }
     size_t operator()(const Null&) {
@@ -233,8 +238,8 @@ public:
         type_indexes.insert(TypeId<NearestFieldType<T>>::value);
         return 0;
     }
-    Status get_scalar_type(DataTypePtr* type) const {
-        return get_least_supertype(type_indexes, type, true /*compatible with string type*/);
+    void get_scalar_type(DataTypePtr* type) const {
+        get_least_supertype(type_indexes, type, true /*compatible with string type*/);
     }
     bool contain_nulls() const { return have_nulls; }
     bool need_convert_field() const { return field_types.size() > 1; }
@@ -246,21 +251,18 @@ private:
 };
 
 } // namespace
-Status get_field_info(const Field& field, FieldInfo* info) {
+void get_field_info(const Field& field, FieldInfo* info) {
     FieldVisitorToScalarType to_scalar_type_visitor;
     apply_visitor(to_scalar_type_visitor, field);
     DataTypePtr type = nullptr;
-    RETURN_IF_ERROR(to_scalar_type_visitor.get_scalar_type(&type));
+    to_scalar_type_visitor.get_scalar_type(&type);
     // array item's dimension may missmatch, eg. [1, 2, [1, 2, 3]]
-    Status num_to_dimensions_status;
     *info = {
             type,
             to_scalar_type_visitor.contain_nulls(),
             to_scalar_type_visitor.need_convert_field(),
-            apply_visitor(FieldVisitorToNumberOfDimensions(&num_to_dimensions_status), field),
+            apply_visitor(FieldVisitorToNumberOfDimensions(), field),
     };
-    RETURN_IF_ERROR(num_to_dimensions_status);
-    return Status::OK();
 }
 
 ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr&& data_, bool is_nullable_)
@@ -297,10 +299,10 @@ size_t ColumnObject::Subcolumn::Subcolumn::allocatedBytes() const {
     return res;
 }
 
-Status ColumnObject::Subcolumn::insert(Field field) {
+void ColumnObject::Subcolumn::insert(Field field) {
     FieldInfo info;
-    RETURN_IF_ERROR(get_field_info(field, &info));
-    return insert(std::move(field), std::move(info));
+    get_field_info(field, &info);
+    insert(std::move(field), std::move(info));
 }
 
 void ColumnObject::Subcolumn::add_new_column_part(DataTypePtr type) {
@@ -308,11 +310,11 @@ void ColumnObject::Subcolumn::add_new_column_part(DataTypePtr type) {
     least_common_type = LeastCommonType {std::move(type)};
 }
 
-Status ColumnObject::Subcolumn::insert(Field field, FieldInfo info) {
+void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) {
     auto base_type = std::move(info.scalar_type);
     if (is_nothing(base_type)) {
         insertDefault();
-        return Status::OK();
+        return;
     }
     auto column_dim = least_common_type.get_dimensions();
     auto value_dim = info.num_dimensions;
@@ -323,8 +325,10 @@ Status ColumnObject::Subcolumn::insert(Field field, FieldInfo info) {
         value_dim = column_dim;
     }
     if (value_dim != column_dim) {
-        return Status::InvalidArgument(
-                "Dimension of types mismatched between inserted value and column.");
+        throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                               "Dimension of types mismatched between inserted value and column, "
+                               "expected:{}, but meet:{} for type:{}",
+                               column_dim, value_dim, least_common_type.get()->get_name());
     }
     if (is_nullable && !is_nothing(base_type)) {
         base_type = make_nullable(base_type);
@@ -348,9 +352,8 @@ Status ColumnObject::Subcolumn::insert(Field field, FieldInfo info) {
     } else if (!least_common_base_type->equals(*base_type) && !is_nothing(base_type)) {
         if (!schema_util::is_conversion_required_between_integers(*base_type,
                                                                   *least_common_base_type)) {
-            RETURN_IF_ERROR(
-                    get_least_supertype(DataTypes {std::move(base_type), least_common_base_type},
-                                        &base_type, true /*compatible with string type*/));
+            get_least_supertype(DataTypes {std::move(base_type), least_common_base_type},
+                                &base_type, true /*compatible with string type*/);
             type_changed = true;
             if (!least_common_base_type->equals(*base_type)) {
                 add_new_column_part(create_array_of_type(std::move(base_type), value_dim));
@@ -360,15 +363,14 @@ Status ColumnObject::Subcolumn::insert(Field field, FieldInfo info) {
 
     if (type_changed || info.need_convert) {
         Field new_field;
-        RETURN_IF_ERROR(convert_field_to_type(field, *least_common_type.get(), &new_field));
+        convert_field_to_type(field, *least_common_type.get(), &new_field);
         field = new_field;
     }
 
     data.back()->insert(field);
-    return Status::OK();
 }
 
-Status ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t start, size_t length) {
+void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t start, size_t length) {
     assert(src.is_finalized());
     const auto& src_column = src.data.back();
     const auto& src_type = src.least_common_type.get();
@@ -379,18 +381,20 @@ Status ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t sta
         data.back()->insert_range_from(*src_column, start, length);
     } else {
         DataTypePtr new_least_common_type = nullptr;
-        RETURN_IF_ERROR(get_least_supertype(DataTypes {least_common_type.get(), src_type},
-                                            &new_least_common_type,
-                                            true /*compatible with string type*/));
+        get_least_supertype(DataTypes {least_common_type.get(), src_type}, &new_least_common_type,
+                            true /*compatible with string type*/);
         ColumnPtr casted_column;
-        RETURN_IF_ERROR(schema_util::cast_column({src_column, src_type, ""}, new_least_common_type,
-                                                 &casted_column));
+        Status st = schema_util::cast_column({src_column, src_type, ""}, new_least_common_type,
+                                             &casted_column);
+        if (!st.ok()) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, st.to_string() + ", real_code:{}",
+                                   st.code());
+        }
         if (!least_common_type.get()->equals(*new_least_common_type)) {
             add_new_column_part(std::move(new_least_common_type));
         }
         data.back()->insert_range_from(*casted_column, start, length);
     }
-    return Status::OK();
 }
 
 bool ColumnObject::Subcolumn::is_finalized() const {
@@ -400,7 +404,9 @@ bool ColumnObject::Subcolumn::is_finalized() const {
 template <typename Func>
 ColumnPtr ColumnObject::apply_for_subcolumns(Func&& func, std::string_view func_name) const {
     if (!is_finalized()) {
-        LOG(FATAL) << "Cannot " << func_name << " non-finalized ColumnObject";
+        // LOG(FATAL) << "Cannot " << func_name << " non-finalized ColumnObject";
+        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                               "Cannot {} non-finalized ColumnObject", func_name);
     }
     auto res = ColumnObject::create(is_nullable);
     for (const auto& subcolumn : subcolumns) {
@@ -560,7 +566,11 @@ void ColumnObject::check_consistency() const {
     }
     for (const auto& leaf : subcolumns) {
         if (num_rows != leaf->data.size()) {
-            assert(false);
+            // LOG(FATAL) << "unmatched column:" << leaf->path.get_path()
+            //            << ", expeted rows:" << num_rows << ", but meet:" << leaf->data.size();
+            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                   "unmatched column: {}, expeted rows: {}, but meet: {}",
+                                   leaf->path.get_path(), num_rows, leaf->data.size());
         }
     }
 }
@@ -575,7 +585,8 @@ size_t ColumnObject::size() const {
 MutableColumnPtr ColumnObject::clone_resized(size_t new_size) const {
     /// cloneResized with new_size == 0 is used for cloneEmpty().
     if (new_size != 0) {
-        LOG(FATAL) << "ColumnObject doesn't support resize to non-zero length";
+        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                               "ColumnObject doesn't support resize to non-zero length");
     }
     return ColumnObject::create(is_nullable);
 }
@@ -605,11 +616,11 @@ void ColumnObject::for_each_subcolumn(ColumnCallback callback) {
     }
 }
 
-Status ColumnObject::try_insert_from(const IColumn& src, size_t n) {
+void ColumnObject::try_insert_from(const IColumn& src, size_t n) {
     return try_insert(src[n]);
 }
 
-Status ColumnObject::try_insert(const Field& field) {
+void ColumnObject::try_insert(const Field& field) {
     const auto& object = field.get<const VariantMap&>();
     phmap::flat_hash_set<StringRef, StringRefHash> inserted;
     size_t old_size = size();
@@ -619,16 +630,16 @@ Status ColumnObject::try_insert(const Field& field) {
         if (!has_subcolumn(key)) {
             bool succ = add_sub_column(key, old_size);
             if (!succ) {
-                return Status::InvalidArgument(
-                        fmt::format("Failed to add sub column {}", key.get_path()));
+                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                                       "Failed to add sub column {}", key.get_path());
             }
         }
         auto* subcolumn = get_subcolumn(key);
         if (!subcolumn) {
-            return Status::InvalidArgument(
-                    fmt::format("Failed to find sub column {}", key.get_path()));
+            doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                             fmt::format("Failed to find sub column {}", key.get_path()));
         }
-        RETURN_IF_ERROR(subcolumn->insert(value));
+        subcolumn->insert(value);
     }
     for (auto& entry : subcolumns) {
         if (!inserted.contains(entry->path.get_path())) {
@@ -636,7 +647,6 @@ Status ColumnObject::try_insert(const Field& field) {
         }
     }
     ++num_rows;
-    return Status::OK();
 }
 
 void ColumnObject::insert_default() {
@@ -674,26 +684,26 @@ Status ColumnObject::try_insert_indices_from(const IColumn& src, const int* indi
         if (*x == -1) {
             ColumnObject::insert_default();
         } else {
-            RETURN_IF_ERROR(ColumnObject::try_insert_from(src, *x));
+            ColumnObject::try_insert_from(src, *x);
         }
     }
     finalize();
     return Status::OK();
 }
 
-Status ColumnObject::try_insert_range_from(const IColumn& src, size_t start, size_t length) {
+void ColumnObject::try_insert_range_from(const IColumn& src, size_t start, size_t length) {
     const auto& src_object = assert_cast<const ColumnObject&>(src);
     if (UNLIKELY(src_object.empty())) {
-        return Status::OK();
+        return;
     }
     for (auto& entry : subcolumns) {
         if (src_object.has_subcolumn(entry->path)) {
             auto* subcolumn = src_object.get_subcolumn(entry->path);
             if (!subcolumn) {
-                return Status::InvalidArgument(
-                        fmt::format("Failed to find sub column {}", entry->path.get_path()));
+                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                                       "Failed to find sub column {}", entry->path.get_path());
             }
-            RETURN_IF_ERROR(entry->data.insertRangeFrom(*subcolumn, start, length));
+            entry->data.insertRangeFrom(*subcolumn, start, length);
         } else {
             entry->data.insertManyDefaults(length);
         }
@@ -714,20 +724,19 @@ Status ColumnObject::try_insert_range_from(const IColumn& src, size_t start, siz
                 succ = add_sub_column(entry->path, num_rows);
             }
             if (!succ) {
-                return Status::InvalidArgument(
-                        fmt::format("Failed to add column {}", entry->path.get_path()));
+                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                                       "Failed to add column {}", entry->path.get_path());
             }
             auto* subcolumn = get_subcolumn(entry->path);
             if (!subcolumn) {
-                return Status::InvalidArgument(
-                        fmt::format("Failed to find sub column {}", entry->path.get_path()));
+                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
+                                       "Failed to find sub column {}", entry->path.get_path());
             }
-            RETURN_IF_ERROR(subcolumn->insertRangeFrom(entry->data, start, length));
+            subcolumn->insertRangeFrom(entry->data, start, length);
         }
     }
     num_rows += length;
     finalize();
-    return Status::OK();
 }
 
 void ColumnObject::pop_back(size_t length) {
diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h
index 7fec48a0c8..339db8c6b9 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -66,7 +66,7 @@ struct FieldInfo {
     /// Number of dimension in array. 0 if field is scalar.
     size_t num_dimensions;
 };
-Status get_field_info(const Field& field, FieldInfo* info);
+void get_field_info(const Field& field, FieldInfo* info);
 /** A column that represents object with dynamic set of subcolumns.
  *  Subcolumns are identified by paths in document and are stored in
  *  a trie-like structure. ColumnObject is not suitable for writing into tables
@@ -109,16 +109,16 @@ public:
 
         /// Inserts a field, which scalars can be arbitrary, but number of
         /// dimensions should be consistent with current common type.
-        /// return Status::InvalidArgument when meet conflict types
-        Status insert(Field field);
+        /// throws InvalidArgument when meet conflict types
+        void insert(Field field);
 
-        Status insert(Field field, FieldInfo info);
+        void insert(Field field, FieldInfo info);
 
         void insertDefault();
 
         void insertManyDefaults(size_t length);
 
-        Status insertRangeFrom(const Subcolumn& src, size_t start, size_t length);
+        void insertRangeFrom(const Subcolumn& src, size_t start, size_t length);
 
         void pop_back(size_t n);
 
@@ -279,12 +279,7 @@ public:
     void for_each_subcolumn(ColumnCallback callback) override;
 
     // Do nothing, call try_insert instead
-    void insert(const Field& field) override {
-        Status st = try_insert(field);
-        if (!st.ok()) {
-            LOG(FATAL) << "insert return ERROR status: " << st;
-        }
-    }
+    void insert(const Field& field) override { try_insert(field); }
 
     void insert_range_from(const IColumn& src, size_t start, size_t length) override;
 
@@ -294,13 +289,12 @@ public:
     void insert_indices_from(const IColumn& src, const int* indices_begin,
                              const int* indices_end) override;
 
-    // Only called in Block::add_row
-    Status try_insert(const Field& field);
+    // May throw execption
+    void try_insert(const Field& field);
 
-    Status try_insert_from(const IColumn& src, size_t n);
+    void try_insert_from(const IColumn& src, size_t n);
 
-    // Only called in Block::add_row
-    Status try_insert_range_from(const IColumn& src, size_t start, size_t length);
+    void try_insert_range_from(const IColumn& src, size_t start, size_t length);
 
     void insert_default() override;
 
diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp
index 7bcfacb1a0..798722d1b0 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -35,6 +35,7 @@
 #include <vector>
 
 #include "common/config.h"
+#include "common/status.h"
 #include "olap/olap_common.h"
 #include "runtime/client_cache.h"
 #include "runtime/exec_env.h"
@@ -273,11 +274,11 @@ Status send_add_columns_rpc(ColumnsWithTypeAndName column_type_names,
     return Status::OK();
 }
 
-void unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_original_type) {
+Status unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_original_type) {
     auto dynamic_col = block.get_by_position(dynamic_col_position).column->assume_mutable();
     auto* column_object_ptr = assert_cast<ColumnObject*>(dynamic_col.get());
     if (column_object_ptr->empty()) {
-        return;
+        return Status::OK();
     }
     size_t num_rows = column_object_ptr->size();
     CHECK(block.rows() <= num_rows);
@@ -308,7 +309,8 @@ void unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_origi
             }
             if (cast_to_original_type && !dst_type->equals(*types[i])) {
                 // Cast static columns to original slot type
-                schema_util::cast_column({subcolumns[i], types[i], ""}, dst_type, &column);
+                RETURN_IF_ERROR(
+                        schema_util::cast_column({subcolumns[i], types[i], ""}, dst_type, &column));
             }
             // replace original column
             column_type_name->column = column;
@@ -326,6 +328,16 @@ void unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_origi
             entry.column->assume_mutable()->insert_many_defaults(num_rows - entry.column->size());
         }
     }
+#ifndef NDEBUG
+    for (const auto& column_type_name : block) {
+        if (column_type_name.column->size() != num_rows) {
+            LOG(FATAL) << "unmatched column:" << column_type_name.name
+                       << ", expeted rows:" << num_rows
+                       << ", but meet:" << column_type_name.column->size();
+        }
+    }
+#endif
+    return Status::OK();
 }
 
 void LocalSchemaChangeRecorder::add_extended_columns(const TabletColumn& new_column,
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 978eaa19c7..fda6a69cf4 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -65,7 +65,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co
 // from object column and casted to the new type from slot_descs.
 // Also if column in block is empty, it will be filled
 // with num_rows of default values
-void unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_original_type);
+Status unfold_object(size_t dynamic_col_position, Block& block, bool cast_to_original_type);
 
 /// If both of types are signed/unsigned integers and size of left field type
 /// is less than right type, we don't need to convert field,
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp
index bc7a821e64..a2a1377050 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -30,6 +30,8 @@
 #include <type_traits>
 #include <vector>
 
+#include "common/exception.h"
+#include "common/status.h"
 #include "vec/common/field_visitors.h"
 #include "vec/common/typeid_cast.h"
 #include "vec/core/accurate_comparison.h"
@@ -94,8 +96,9 @@ Field convert_numeric_type_impl(const Field& from) {
     }
     return result;
 }
+
 template <typename To>
-Status convert_numric_type(const Field& from, const IDataType& type, Field* to) {
+void convert_numric_type(const Field& from, const IDataType& type, Field* to) {
     if (from.get_type() == Field::Types::UInt64) {
         *to = convert_numeric_type_impl<UInt64, To>(from);
     } else if (from.get_type() == Field::Types::Int64) {
@@ -107,18 +110,17 @@ Status convert_numric_type(const Field& from, const IDataType& type, Field* to)
     } else if (from.get_type() == Field::Types::Int128) {
         *to = convert_numeric_type_impl<Int128, To>(from);
     } else {
-        return Status::InvalidArgument(
-                fmt::format("Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
-                            type.get_name(), from.get_type()));
+        throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                               "Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
+                               type.get_name(), from.get_type());
     }
-    return Status::OK();
 }
 
-Status convert_field_to_typeImpl(const Field& src, const IDataType& type,
-                                 const IDataType* from_type_hint, Field* to) {
+void convert_field_to_typeImpl(const Field& src, const IDataType& type,
+                               const IDataType* from_type_hint, Field* to) {
     if (from_type_hint && from_type_hint->equals(type)) {
         *to = src;
-        return Status::OK();
+        return;
     }
     WhichDataType which_type(type);
     // TODO add more types
@@ -163,16 +165,16 @@ Status convert_field_to_typeImpl(const Field& src, const IDataType& type,
             src.get_type() == Field::Types::UInt64) {
             /// We don't need any conversion UInt64 is under type of Date and DateTime
             *to = src;
-            return Status::OK();
+            return;
         }
     } else if (which_type.is_string_or_fixed_string()) {
         if (src.get_type() == Field::Types::String) {
             *to = src;
-            return Status::OK();
+            return;
         }
         // TODO this is a very simple translator, support more complex types
         *to = apply_visitor(FieldVisitorToStringSimple(), src);
-        return Status::OK();
+        return;
     } else if (const DataTypeArray* type_array = typeid_cast<const DataTypeArray*>(&type)) {
         if (src.get_type() == Field::Types::Array) {
             const Array& src_arr = src.get<Array>();
@@ -180,14 +182,14 @@ Status convert_field_to_typeImpl(const Field& src, const IDataType& type,
             const auto& element_type = *(type_array->get_nested_type());
             Array res(src_arr_size);
             for (size_t i = 0; i < src_arr_size; ++i) {
-                RETURN_IF_ERROR(convert_field_to_type(src_arr[i], element_type, &res[i]));
+                convert_field_to_type(src_arr[i], element_type, &res[i]);
                 if (res[i].is_null() && !element_type.is_nullable()) {
-                    return Status::InvalidArgument(
-                            fmt::format("Cannot convert NULL to {}", element_type.get_name()));
+                    throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Cannot convert NULL to {}",
+                                           element_type.get_name());
                 }
             }
             *to = Field(res);
-            return Status::OK();
+            return;
         }
     }
     // else if (const DataTypeTuple* type_tuple = typeid_cast<const DataTypeTuple*>(&type)) {
@@ -229,31 +231,31 @@ Status convert_field_to_typeImpl(const Field& src, const IDataType& type,
     //         return Status::OK();
     //     }
     // }
-    return Status::InvalidArgument(
-            fmt::format("Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
-                        type.get_name(), src.get_type()));
+    throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                           "Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
+                           type.get_name(), src.get_type());
 }
 } // namespace
-Status convert_field_to_type(const Field& from_value, const IDataType& to_type, Field* to,
-                             const IDataType* from_type_hint) {
+void convert_field_to_type(const Field& from_value, const IDataType& to_type, Field* to,
+                           const IDataType* from_type_hint) {
     if (from_value.is_null()) {
         *to = from_value;
-        return Status::OK();
+        return;
     }
     if (from_type_hint && from_type_hint->equals(to_type)) {
         *to = from_value;
-        return Status::OK();
+        return;
     }
     if (const auto* nullable_type = typeid_cast<const DataTypeNullable*>(&to_type)) {
         const IDataType& nested_type = *nullable_type->get_nested_type();
         /// NULL remains NULL after any conversion.
         if (WhichDataType(nested_type).is_nothing()) {
             *to = {};
-            return Status::OK();
+            return;
         }
         if (from_type_hint && from_type_hint->equals(nested_type)) {
             *to = from_value;
-            return Status::OK();
+            return;
         }
         return convert_field_to_typeImpl(from_value, nested_type, from_type_hint, to);
     } else {
diff --git a/be/src/vec/data_types/convert_field_to_type.h b/be/src/vec/data_types/convert_field_to_type.h
index 8d57994c31..07d48452a4 100644
--- a/be/src/vec/data_types/convert_field_to_type.h
+++ b/be/src/vec/data_types/convert_field_to_type.h
@@ -33,7 +33,7 @@ class IDataType;
   * If the value does not fall into the range - returns Null.
   */
 
-Status convert_field_to_type(const Field& from_value, const IDataType& to_type, Field* field,
-                             const IDataType* from_type_hint = nullptr);
+void convert_field_to_type(const Field& from_value, const IDataType& to_type, Field* field,
+                           const IDataType* from_type_hint = nullptr);
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp
index 3d08a77443..be9dd5c05c 100644
--- a/be/src/vec/data_types/get_least_supertype.cpp
+++ b/be/src/vec/data_types/get_least_supertype.cpp
@@ -20,6 +20,7 @@
 
 #include "vec/data_types/get_least_supertype.h"
 
+#include <fmt/core.h>
 #include <fmt/format.h>
 #include <glog/logging.h>
 #include <stddef.h>
@@ -31,6 +32,7 @@
 #include <string>
 #include <vector>
 
+#include "common/status.h"
 #include "vec/common/typeid_cast.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
@@ -45,23 +47,36 @@ namespace doris::vectorized {
 
 namespace {
 
+String type_to_string(const DataTypePtr& type) {
+    return type->get_name();
+}
+String type_to_string(const TypeIndex& type) {
+    return getTypeName(type);
+}
+
+template <typename DataTypes>
 String get_exception_message_prefix(const DataTypes& types) {
     std::stringstream res;
     res << "There is no supertype for types ";
-
     bool first = true;
     for (const auto& type : types) {
         if (!first) res << ", ";
         first = false;
-
-        res << type->get_name();
+        res << type_to_string(type);
     }
-
     return res.str();
 }
+
 } // namespace
 
-Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
+void get_numeric_type(const TypeIndexSet& types, DataTypePtr* type, bool compatible_with_string) {
+    auto throw_or_return = [&](std::string_view message, int error_code) {
+        if (compatible_with_string) {
+            *type = std::make_shared<DataTypeString>();
+            return;
+        }
+        throw doris::Exception(error_code, message);
+    };
     bool all_numbers = true;
 
     size_t max_bits_of_signed_integer = 0;
@@ -104,9 +119,11 @@ Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
     if (max_bits_of_signed_integer || max_bits_of_unsigned_integer ||
         max_mantissa_bits_of_floating) {
         if (!all_numbers) {
-            LOG(INFO) << " because some of them are numbers and some of them are not";
             *type = nullptr;
-            return Status::InvalidArgument("some of them are numbers and some of them are not");
+            return throw_or_return(
+                    get_exception_message_prefix(types) +
+                            " because some of them are numbers and some of them are not",
+                    doris::ErrorCode::INVALID_ARGUMENT);
         }
 
         /// If there are signed and unsigned types of same bit-width, the result must be signed number with at least one more bit.
@@ -117,8 +134,9 @@ Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
 
         /// If unsigned is not covered by signed.
         if (max_bits_of_signed_integer &&
-            max_bits_of_unsigned_integer >= max_bits_of_signed_integer)
+            max_bits_of_unsigned_integer >= max_bits_of_signed_integer) {
             ++min_bit_width_of_integer;
+        }
 
         /// If the result must be floating.
         if (max_mantissa_bits_of_floating) {
@@ -126,18 +144,21 @@ Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
                     std::max(min_bit_width_of_integer, max_mantissa_bits_of_floating);
             if (min_mantissa_bits <= 24) {
                 *type = std::make_shared<DataTypeFloat32>();
-                return Status::OK();
+                return;
             } else if (min_mantissa_bits <= 53) {
                 *type = std::make_shared<DataTypeFloat64>();
-                return Status::OK();
+                return;
             } else {
                 LOG(INFO) << " because some of them are integers and some are floating point "
                              "but there is no floating point type, that can exactly represent "
                              "all required integers";
                 *type = nullptr;
-                return Status::InvalidArgument(
-                        "there is no floating point type, that can exactly represent "
-                        "all required integers");
+                return throw_or_return(
+                        get_exception_message_prefix(types) +
+                                " because some of them are integers and some are floating point "
+                                "but there is no floating point type, that can exactly represent "
+                                "all required integers",
+                        doris::ErrorCode::INVALID_ARGUMENT);
             }
         }
 
@@ -145,23 +166,25 @@ Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
         if (max_bits_of_signed_integer) {
             if (min_bit_width_of_integer <= 8) {
                 *type = std::make_shared<DataTypeInt8>();
-                return Status::OK();
+                return;
             } else if (min_bit_width_of_integer <= 16) {
                 *type = std::make_shared<DataTypeInt16>();
-                return Status::OK();
+                return;
             } else if (min_bit_width_of_integer <= 32) {
                 *type = std::make_shared<DataTypeInt32>();
-                return Status::OK();
+                return;
             } else if (min_bit_width_of_integer <= 64) {
                 *type = std::make_shared<DataTypeInt64>();
-                return Status::OK();
+                return;
             } else {
                 LOG(INFO) << " because some of them are signed integers and some are unsigned "
                              "integers, but there is no signed integer type, that can exactly "
                              "represent all required unsigned integer values";
-                return Status::InvalidArgument(
-                        "there is no signed integer type, that can exactly "
-                        "represent all required unsigned integer values");
+                return throw_or_return(
+                        " because some of them are signed integers and some are unsigned "
+                        "integers, but there is no signed integer type, that can exactly "
+                        "represent all required unsigned integer values",
+                        doris::ErrorCode::INVALID_ARGUMENT);
             }
         }
 
@@ -169,43 +192,50 @@ Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
         {
             if (min_bit_width_of_integer <= 8) {
                 *type = std::make_shared<DataTypeUInt8>();
-                return Status::OK();
+                return;
             } else if (min_bit_width_of_integer <= 16) {
                 *type = std::make_shared<DataTypeUInt16>();
-                return Status::OK();
+                return;
             } else if (min_bit_width_of_integer <= 32) {
                 *type = std::make_shared<DataTypeUInt32>();
-                return Status::OK();
+                return;
             } else if (min_bit_width_of_integer <= 64) {
                 *type = std::make_shared<DataTypeUInt64>();
-                return Status::OK();
+                return;
             } else {
-                LOG(FATAL) << "Logical error: "
-                           << "but as all data types are unsigned integers, we must have found "
-                              "maximum unsigned integer type";
+                LOG(WARNING) << "Logical error: "
+                             << "but as all data types are unsigned integers, we must have found "
+                                "maximum unsigned integer type";
                 *type = nullptr;
-                return Status::InvalidArgument(
-                        "all data types are unsigned integers, we must have found "
-                        "maximum unsigned integer type");
+                return throw_or_return(
+                        "Logical error: " + get_exception_message_prefix(types) +
+                                " but as all data types are unsigned integers, we must have found "
+                                "maximum unsigned integer type",
+                        doris::ErrorCode::INVALID_ARGUMENT);
             }
         }
     }
     *type = nullptr;
-    return Status::OK();
 }
 
 // TODO conflict type resolve
-Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compatible_with_string) {
+void get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compatible_with_string) {
     /// Trivial cases
-
+    auto throw_or_return = [&](std::string_view message, int error_code) {
+        if (compatible_with_string) {
+            *type = std::make_shared<DataTypeString>();
+            return;
+        }
+        throw doris::Exception(error_code, String(message));
+    };
     if (types.empty()) {
         *type = std::make_shared<DataTypeNothing>();
-        return Status::OK();
+        return;
     }
 
     if (types.size() == 1) {
         *type = types[0];
-        return Status::OK();
+        return;
     }
 
     /// All types are equal
@@ -220,7 +250,7 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
 
         if (all_equal) {
             *type = types[0];
-            return Status::OK();
+            return;
         }
     }
 
@@ -231,12 +261,16 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
         DataTypes non_nothing_types;
         non_nothing_types.reserve(types.size());
 
-        for (const auto& type : types)
-            if (!typeid_cast<const DataTypeNothing*>(type.get()))
+        for (const auto& type : types) {
+            if (!typeid_cast<const DataTypeNothing*>(type.get())) {
                 non_nothing_types.emplace_back(type);
+            }
+        }
 
-        if (non_nothing_types.size() < types.size())
-            return get_least_supertype(non_nothing_types, type, compatible_with_string);
+        if (non_nothing_types.size() < types.size()) {
+            get_least_supertype(non_nothing_types, type, compatible_with_string);
+            return;
+        }
     }
 
     /// For Nullable
@@ -251,27 +285,28 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
                         typeid_cast<const DataTypeNullable*>(type.get())) {
                 have_nullable = true;
 
-                if (!type_nullable->only_null())
+                if (!type_nullable->only_null()) {
                     nested_types.emplace_back(type_nullable->get_nested_type());
-            } else
+                }
+            } else {
                 nested_types.emplace_back(type);
+            }
         }
 
         if (have_nullable) {
             DataTypePtr nested_type;
-            Status st = get_least_supertype(nested_types, &nested_type, compatible_with_string);
-            if (!st.ok()) {
-                return st;
-            }
+            get_least_supertype(nested_types, &nested_type, compatible_with_string);
             *type = std::make_shared<DataTypeNullable>(nested_type);
-            return st;
+            return;
         }
     }
 
     /// Non-recursive rules
 
     phmap::flat_hash_set<TypeIndex> type_ids;
-    for (const auto& type : types) type_ids.insert(type->get_type_id());
+    for (const auto& type : types) {
+        type_ids.insert(type->get_type_id());
+    }
 
     /// For String and FixedString, or for different FixedStrings, the common type is String.
     /// No other types are compatible with Strings. TODO Enums?
@@ -285,12 +320,14 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
                 LOG(INFO)
                         << get_exception_message_prefix(types)
                         << " because some of them are String/FixedString and some of them are not";
-                return Status::InvalidArgument(
-                        "some of them are String/FixedString and some of them are not");
+                return throw_or_return(get_exception_message_prefix(types) +
+                                               " because some of them are String/FixedString and "
+                                               "some of them are not",
+                                       doris::ErrorCode::INVALID_ARGUMENT);
             }
 
             *type = std::make_shared<DataTypeString>();
-            return Status::OK();
+            return;
         }
     }
 
@@ -304,12 +341,14 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
             if (!all_date_or_datetime) {
                 LOG(INFO) << get_exception_message_prefix(types)
                           << " because some of them are Date/DateTime and some of them are not";
-                return Status::InvalidArgument(
-                        "because some of them are Date/DateTime and some of them are not");
+                return throw_or_return(
+                        get_exception_message_prefix(types) +
+                                " because some of them are Date/DateTime and some of them are not",
+                        doris::ErrorCode::INVALID_ARGUMENT);
             }
 
             *type = std::make_shared<DataTypeDateTime>();
-            return Status::OK();
+            return;
         }
     }
 
@@ -340,8 +379,10 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
             if (num_supported != type_ids.size()) {
                 LOG(INFO) << get_exception_message_prefix(types)
                           << " because some of them have no lossless convertion to Decimal";
-                return Status::InvalidArgument(
-                        "some of them have no lossless convertion to Decimal");
+                return throw_or_return(
+                        get_exception_message_prefix(types) +
+                                " because some of them have no lossless convertion to Decimal",
+                        doris::ErrorCode::INVALID_ARGUMENT);
             }
 
             UInt32 max_scale = 0;
@@ -364,50 +405,58 @@ Status get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compa
                 LOG(INFO) << fmt::format("{} because the least supertype is Decimal({},{})",
                                          get_exception_message_prefix(types), min_precision,
                                          max_scale);
-                return Status::InvalidArgument(
-                        fmt::format("{} because the least supertype is Decimal({},{})",
-                                    get_exception_message_prefix(types), min_precision, max_scale));
+                return throw_or_return(get_exception_message_prefix(types) +
+                                               fmt::format(" because some of them have no lossless "
+                                                           "convertion to Decimal({},{})",
+                                                           min_precision, max_scale),
+                                       doris::ErrorCode::INVALID_ARGUMENT);
             }
 
             if (have_decimal128 || min_precision > DataTypeDecimal<Decimal64>::max_precision()) {
                 *type = std::make_shared<DataTypeDecimal<Decimal128>>(
                         DataTypeDecimal<Decimal128>::max_precision(), max_scale);
-                return Status::OK();
+                return;
             }
             if (have_decimal128i || min_precision > DataTypeDecimal<Decimal64>::max_precision()) {
                 *type = std::make_shared<DataTypeDecimal<Decimal128I>>(
                         DataTypeDecimal<Decimal128I>::max_precision(), max_scale);
-                return Status::OK();
+                return;
             }
             if (have_decimal64 || min_precision > DataTypeDecimal<Decimal32>::max_precision()) {
                 *type = std::make_shared<DataTypeDecimal<Decimal64>>(
                         DataTypeDecimal<Decimal64>::max_precision(), max_scale);
-                return Status::OK();
+                return;
             }
             *type = std::make_shared<DataTypeDecimal<Decimal32>>(
                     DataTypeDecimal<Decimal32>::max_precision(), max_scale);
-            return Status::OK();
+            return;
         }
     }
 
     /// For numeric types, the most complicated part.
     {
         DataTypePtr numeric_type = nullptr;
-        Status st = get_numeric_type(type_ids, &numeric_type);
+        get_numeric_type(type_ids, &numeric_type, compatible_with_string);
         if (numeric_type) {
-            DCHECK(st.ok());
             *type = numeric_type;
-            return Status::OK();
+            return;
         }
     }
 
     /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
     *type = nullptr;
-    return Status::InvalidArgument(get_exception_message_prefix(types));
+    return throw_or_return(get_exception_message_prefix(types), ErrorCode::INVALID_ARGUMENT);
 }
 
-Status get_least_supertype(const TypeIndexSet& types, DataTypePtr* type,
-                           bool compatible_with_string) {
+void get_least_supertype(const TypeIndexSet& types, DataTypePtr* type,
+                         bool compatible_with_string) {
+    auto throw_or_return = [&](std::string_view message, int error_code) {
+        if (compatible_with_string) {
+            *type = std::make_shared<DataTypeString>();
+            return;
+        }
+        throw doris::Exception(error_code, String(message));
+    };
     TypeIndexSet types_set;
     for (const auto& t : types) {
         if (WhichDataType(t).is_nothing()) continue;
@@ -416,8 +465,9 @@ Status get_least_supertype(const TypeIndexSet& types, DataTypePtr* type,
             LOG(INFO) << "Cannot get common type by type ids with parametric type"
                       << getTypeName(t);
             *type = nullptr;
-            return Status::InvalidArgument(
-                    "Cannot get common type by type ids with parametric type");
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                                   "Cannot get common type by type ids with parametric type {}",
+                                   type_to_string(t));
         }
 
         types_set.insert(t);
@@ -425,31 +475,33 @@ Status get_least_supertype(const TypeIndexSet& types, DataTypePtr* type,
 
     if (types_set.empty()) {
         *type = std::make_shared<DataTypeNothing>();
-        return Status::OK();
+        return;
     }
 
     if (types.count(TypeIndex::String)) {
-        if (types.size() != 1 && !compatible_with_string) {
+        if (types.size() != 1) {
             LOG(INFO) << " because some of them are String and some of them are not";
             *type = nullptr;
-            return Status::InvalidArgument("some of them are String and some of them are not");
+            return throw_or_return(
+                    get_exception_message_prefix(types) +
+                            " because some of them are String and some of them are not",
+                    ErrorCode::INVALID_ARGUMENT);
         }
 
         *type = std::make_shared<DataTypeString>();
-        return Status::OK();
+        return;
     }
 
     /// For numeric types, the most complicated part.
     DataTypePtr numeric_type = nullptr;
-    Status st = get_numeric_type(types, &numeric_type);
+    get_numeric_type(types, &numeric_type, compatible_with_string);
     if (numeric_type) {
-        DCHECK(st.ok());
         *type = numeric_type;
-        return Status::OK();
+        return;
     }
     /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
     *type = nullptr;
-    return Status::InvalidArgument("unknown type");
+    return throw_or_return(get_exception_message_prefix(types), ErrorCode::INVALID_ARGUMENT);
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/get_least_supertype.h b/be/src/vec/data_types/get_least_supertype.h
index ad52f553df..72a6fc31f2 100644
--- a/be/src/vec/data_types/get_least_supertype.h
+++ b/be/src/vec/data_types/get_least_supertype.h
@@ -42,10 +42,10 @@ namespace doris::vectorized {
 
 using TypeIndexSet = phmap::flat_hash_set<TypeIndex>;
 
-Status get_least_supertype(const DataTypes& types, DataTypePtr* type,
-                           bool compatible_with_string = false);
+void get_least_supertype(const DataTypes& types, DataTypePtr* type,
+                         bool compatible_with_string = false);
 
-Status get_least_supertype(const TypeIndexSet& types, DataTypePtr* type,
-                           bool compatible_with_string = false);
+void get_least_supertype(const TypeIndexSet& types, DataTypePtr* type,
+                         bool compatible_with_string = false);
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp
index daa549776b..5925515200 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -39,6 +39,7 @@
 
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/status.h"
 #include "exprs/json_functions.h"
 #include "io/file_factory.h"
 #include "io/fs/buffered_reader.h"
@@ -460,8 +461,7 @@ Status NewJsonReader::_parse_dynamic_json(bool* is_empty_row, bool* eof, Block&
     _bytes_read_counter += size;
     auto& dynamic_column = block.get_columns().back()->assume_mutable_ref();
     auto& column_object = assert_cast<vectorized::ColumnObject&>(dynamic_column);
-    Defer __finalize_clousure([&] {
-        // Reached buffer size, unfold intermediate column object
+    auto finalize_column = [&]() -> Status {
         size_t batch_size = std::max(_state->batch_size(), (int)_MIN_BATCH_SIZE);
         if (column_object.size() >= batch_size || _reader_eof) {
             column_object.finalize();
@@ -470,35 +470,22 @@ Status NewJsonReader::_parse_dynamic_json(bool* is_empty_row, bool* eof, Block&
             }
             // Unfold object columns for the purpose of extracting static columns and
             // fill default values missing in static columns
-            schema_util::unfold_object(block.columns() - 1, block,
-                                       true /*cast to original column type*/);
+            RETURN_IF_ERROR(schema_util::unfold_object(block.columns() - 1, block,
+                                                       true /*cast to original column type*/));
         }
-    });
+        return Status::OK();
+    };
     // read all data, then return
     if (size == 0 || *eof) {
         *is_empty_row = true;
+        RETURN_IF_ERROR(finalize_column());
         return Status::OK();
     }
-    Status st = doris::vectorized::parse_json_to_variant(column_object, StringRef {json_str, size},
-                                                         _json_parser.get());
-    if (st.is<DATA_QUALITY_ERROR>()) {
-        fmt::memory_buffer error_msg;
-        fmt::format_to(error_msg, "Parse json data for JsonDoc failed. error info: {}",
-                       st.to_string());
-        RETURN_IF_ERROR(_state->append_error_msg_to_file(
-                [&]() -> std::string { return std::string((char*)json_str, size); },
-                [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof));
-        _counter->num_rows_filtered++;
-        if (*_scanner_eof) {
-            // Case A: if _scanner_eof is set to true in "append_error_msg_to_file", which means
-            // we meet enough invalid rows and the scanner should be stopped.
-            // So we set eof to true and return OK, the caller will stop the process as we meet the end of file.
-            *eof = true;
-            return Status::OK();
-        }
-        return Status::DataQualityError(fmt::to_string(error_msg));
-    }
 
+    RETURN_IF_CATCH_EXCEPTION(doris::vectorized::parse_json_to_variant(
+            column_object, StringRef {json_str, size}, _json_parser.get()));
+    // TODO correctly handle data quality error
+    RETURN_IF_ERROR(finalize_column());
     return Status::OK();
 }
 
diff --git a/be/src/vec/json/json_parser.cpp b/be/src/vec/json/json_parser.cpp
index 95dd0034c3..a05a1737b9 100644
--- a/be/src/vec/json/json_parser.cpp
+++ b/be/src/vec/json/json_parser.cpp
@@ -161,8 +161,9 @@ void JSONDataParser<ParserImpl>::traverseArrayElement(const Element& element,
                 if (current_nested_sizes.size() == ctx.current_size) {
                     current_nested_sizes.push_back(array_size);
                 } else if (array_size != current_nested_sizes.back()) {
-                    LOG(FATAL) << fmt::format("Array sizes mismatched ({} and {})", array_size,
-                                              current_nested_sizes.back());
+                    throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                                           "Array sizes mismatched ({} and {})", array_size,
+                                           current_nested_sizes.back());
                 }
             }
             path_array.push_back(std::move(values[i]));
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index 7b12d3e7cd..1ae5d0d591 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -192,8 +192,8 @@ bool try_insert_default_from_nested(const std::shared_ptr<Node>& entry,
 }
 
 template <typename ParserImpl>
-Status parse_json_to_variant(IColumn& column, const char* src, size_t length,
-                             JSONDataParser<ParserImpl>* parser) {
+void parse_json_to_variant(IColumn& column, const char* src, size_t length,
+                           JSONDataParser<ParserImpl>* parser) {
     auto& column_object = assert_cast<ColumnObject&>(column);
     std::optional<ParseResult> result;
     /// Treat empty string as an empty object
@@ -205,8 +205,8 @@ Status parse_json_to_variant(IColumn& column, const char* src, size_t length,
     }
     if (!result) {
         LOG(INFO) << "failed to parse " << std::string_view(src, length) << ", length= " << length;
-        return Status::DataQualityError(
-                fmt::format("Failed to parse object {}", std::string_view(src, length)));
+        throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to parse object {}",
+                               std::string_view(src, length));
     }
     auto& [paths, values] = *result;
     assert(paths.size() == values.size());
@@ -214,18 +214,21 @@ Status parse_json_to_variant(IColumn& column, const char* src, size_t length,
     size_t num_rows = column_object.size();
     for (size_t i = 0; i < paths.size(); ++i) {
         FieldInfo field_info;
-        RETURN_IF_ERROR(get_field_info(values[i], &field_info));
+        get_field_info(values[i], &field_info);
         // TODO support multi dimensions array
         if (!config::enable_parse_multi_dimession_array && field_info.num_dimensions >= 2) {
-            return Status::DataQualityError(
+            throw doris::Exception(
+                    ErrorCode::INVALID_ARGUMENT,
                     "Sorry multi dimensions array is not supported now, we are working on it");
         }
         if (is_nothing(field_info.scalar_type)) {
             continue;
         }
         if (!paths_set.insert(paths[i].get_path()).second) {
-            return Status::DataQualityError(
-                    fmt::format("Object has ambiguous path {}, {}", paths[i].get_path()));
+            // return Status::DataQualityError(
+            //         fmt::format("Object has ambiguous path {}, {}", paths[i].get_path()));
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Object has ambiguous path {}",
+                                   paths[i].get_path());
         }
 
         if (!column_object.has_subcolumn(paths[i])) {
@@ -237,16 +240,11 @@ Status parse_json_to_variant(IColumn& column, const char* src, size_t length,
         }
         auto* subcolumn = column_object.get_subcolumn(paths[i]);
         if (!subcolumn) {
-            return Status::DataQualityError(
-                    fmt::format("Failed to find sub column {}", paths[i].get_path()));
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to find sub column {}",
+                                   paths[i].get_path());
         }
         assert(subcolumn->size() == num_rows);
-        Status st = subcolumn->insert(std::move(values[i]), std::move(field_info));
-        if (st.is_invalid_argument()) {
-            return Status::DataQualityError(
-                    fmt::format("Failed to insert field {}", st.to_string()));
-        }
-        RETURN_IF_ERROR(st);
+        subcolumn->insert(std::move(values[i]), std::move(field_info));
     }
     // /// Insert default values to missed subcolumns.
     const auto& subcolumns = column_object.get_subcolumns();
@@ -259,7 +257,6 @@ Status parse_json_to_variant(IColumn& column, const char* src, size_t length,
         }
     }
     column_object.incr_num_rows();
-    return Status::OK();
 }
 
 bool extract_key(MutableColumns& columns, StringRef json, const std::vector<StringRef>& keys,
@@ -268,17 +265,16 @@ bool extract_key(MutableColumns& columns, StringRef json, const std::vector<Stri
 }
 
 // exposed interfaces
-Status parse_json_to_variant(IColumn& column, const StringRef& json,
-                             JSONDataParser<SimdJSONParser>* parser) {
+void parse_json_to_variant(IColumn& column, const StringRef& json,
+                           JSONDataParser<SimdJSONParser>* parser) {
     return parse_json_to_variant(column, json.data, json.size, parser);
 }
 
-Status parse_json_to_variant(IColumn& column, const std::vector<StringRef>& jsons) {
+void parse_json_to_variant(IColumn& column, const std::vector<StringRef>& jsons) {
     auto parser = parsers_pool.get([] { return new JSONDataParser<SimdJSONParser>(); });
     for (StringRef str : jsons) {
-        RETURN_IF_ERROR(parse_json_to_variant(column, str.data, str.size, parser.get()));
+        parse_json_to_variant(column, str.data, str.size, parser.get());
     }
-    return Status::OK();
 }
 
 bool extract_key(MutableColumns& columns, const std::vector<StringRef>& jsons,
diff --git a/be/src/vec/json/parse2column.h b/be/src/vec/json/parse2column.h
index 0bc13dd591..4e78d98164 100644
--- a/be/src/vec/json/parse2column.h
+++ b/be/src/vec/json/parse2column.h
@@ -35,12 +35,12 @@ class JSONDataParser;
 
 namespace doris::vectorized {
 
-// parse a batch of json strings into column object
-Status parse_json_to_variant(IColumn& column, const std::vector<StringRef>& jsons);
+// parse a batch of json strings into column object, throws doris::Execption when failed
+void parse_json_to_variant(IColumn& column, const std::vector<StringRef>& jsons);
 
-// parse a single json
-Status parse_json_to_variant(IColumn& column, const StringRef& jsons,
-                             JSONDataParser<SimdJSONParser>* parser);
+// parse a single json, throws doris::Execption when failed
+void parse_json_to_variant(IColumn& column, const StringRef& jsons,
+                           JSONDataParser<SimdJSONParser>* parser);
 
 // extract keys columns from json strings into columns
 bool extract_key(MutableColumns& columns, const std::vector<StringRef>& jsons,
diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp
index 3070975a3c..1deff5f6d2 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -816,7 +816,7 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorStruct::convert_to_olap()
         DataTypePtr sub_type = data_type_struct->get_element(i);
         ColumnWithTypeAndName sub_typed_column = {sub_column, sub_type, ""};
         _sub_convertors[i]->set_source_column(sub_typed_column, _row_pos, _num_rows);
-        _sub_convertors[i]->convert_to_olap();
+        RETURN_IF_ERROR(_sub_convertors[i]->convert_to_olap());
         _results[data_cursor] = _sub_convertors[i]->get_data();
         _results[null_map_cursor] = _sub_convertors[i]->get_nullmap();
         data_cursor++;
@@ -867,7 +867,7 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap(
     ColumnWithTypeAndName item_typed_column = {
             item_data, remove_nullable(data_type_array->get_nested_type()), ""};
     _item_convertor->set_source_column(item_typed_column, start, size);
-    _item_convertor->convert_to_olap();
+    RETURN_IF_ERROR(_item_convertor->convert_to_olap());
 
     CollectionValue* collection_value = _values.data();
     for (size_t i = 0; i < _num_rows; ++i, ++collection_value) {
@@ -954,12 +954,12 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorMap::convert_to_olap(
     _base_offset += elem_size;
     ColumnWithTypeAndName key_typed_column = {key_data, data_type_map->get_key_type(), "map.key"};
     _key_convertor->set_source_column(key_typed_column, start_offset, elem_size);
-    _key_convertor->convert_to_olap();
+    RETURN_IF_ERROR(_key_convertor->convert_to_olap());
 
     ColumnWithTypeAndName value_typed_column = {value_data, data_type_map->get_value_type(),
                                                 "map.value"};
     _value_convertor->set_source_column(value_typed_column, start_offset, elem_size);
-    _value_convertor->convert_to_olap();
+    RETURN_IF_ERROR(_value_convertor->convert_to_olap());
 
     // todo (Amory). put this value into MapValue
     _results[0] = (void*)elem_size;
diff --git a/lowercase.json b/lowercase.json
new file mode 100644
index 0000000000..f2728cd0aa
--- /dev/null
+++ b/lowercase.json
@@ -0,0 +1,5 @@
+{"xxxx": 1234}
+{"xxxx": 12345678}
+{"xxxx": "5679"}
+{"XXXX": "5679"}
+{"YYY": "5679"}
diff --git a/regression-test/data/dynamic_table_p0/floating_point.json b/regression-test/data/dynamic_table_p0/floating_point.json
new file mode 100644
index 0000000000..9d4e742a0e
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/floating_point.json
@@ -0,0 +1,4 @@
+{"type":1}
+{"c": 10000000000}
+{"c":1.0}
+{"ca":1}
diff --git a/regression-test/data/dynamic_table_p0/floating_point2.json b/regression-test/data/dynamic_table_p0/floating_point2.json
new file mode 100644
index 0000000000..5ad60cd4fc
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/floating_point2.json
@@ -0,0 +1,4 @@
+{"type":1}
+{"a": 10000000000}
+{"a":1.0}
+{"a":1}
diff --git a/regression-test/data/dynamic_table_p0/floating_point3.json b/regression-test/data/dynamic_table_p0/floating_point3.json
new file mode 100644
index 0000000000..eab9e4c757
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/floating_point3.json
@@ -0,0 +1,4 @@
+{"type":1}
+{"c": "10110101"}
+{"c":1991.0222}
+{"c":100000}
diff --git a/regression-test/data/dynamic_table_p0/invalid_dimension.json b/regression-test/data/dynamic_table_p0/invalid_dimension.json
new file mode 100644
index 0000000000..23311c2692
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/invalid_dimension.json
@@ -0,0 +1,4 @@
+{"type":1}
+{"b":[1, 2]}
+{"b":""}
+{"b":1}
diff --git a/regression-test/data/dynamic_table_p0/invalid_format.json b/regression-test/data/dynamic_table_p0/invalid_format.json
new file mode 100644
index 0000000000..d9eca95a2a
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/invalid_format.json
@@ -0,0 +1,3 @@
+{"type":1}
+{"a:[1, 2]}
+{"a":1}
diff --git a/regression-test/data/dynamic_table_p0/load.out b/regression-test/data/dynamic_table_p0/load.out
new file mode 100644
index 0000000000..59e01e1dd4
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/load.out
@@ -0,0 +1,1939 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+42890
+
+-- !sql --
+11924
+
+-- !sql --
+23413
+
+-- !sql --
+23413
+
+-- !sql --
+23413
+
+-- !sql --
+27232
+
+-- !sql --
+23413
+
+-- !sql --
+23413
+
+-- !sql --
+38488
+
+-- !sql --
+38488
+
+-- !sql --
+38488
+
+-- !sql --
+38488
+
+-- !sql --
+38488
+
+-- !sql --
+38488
+
+-- !sql --
+12051
+
+-- !sql --
+12051
+
+-- !sql --
+12051
+
+-- !sql --
+12051
+
+-- !sql --
+12051
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5369
+
+-- !sql --
+5339
+
+-- !sql --
+5592
+
+-- !sql --
+5592
+
+-- !sql --
+4768
+
+-- !sql --
+4588
+
+-- !sql --
+1035
+
+-- !sql --
+1035
+
+-- !sql --
+1035
+
+-- !sql --
+1035
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+3407
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+4084
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1298
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+789
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+1319
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+175
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+70
+
+-- !sql --
+181
+
+-- !sql --
+181
+
+-- !sql --
+175
+
+-- !sql --
+1719
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2115
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2087
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+1952
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2142
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2094
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+1972
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+2151
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+1719
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+524
+
+-- !sql --
+1721
+
+-- !sql --
+432
+
+-- !sql --
+656
+
+-- !sql --
+520
+
+-- !sql --
+432
+
+-- !sql --
+677
+
+-- !sql --
+432
+
+-- !sql --
+432
+
+-- !sql --
+432
+
+-- !sql --
+432
+
+-- !sql --
+432
+
+-- !sql --
+1305
+
+-- !sql --
+1135
+
+-- !sql --
+1167
+
+-- !sql --
+2189
+
+-- !sql --
+2189
+
+-- !sql --
+2189
+
+-- !sql --
+843
+
+-- !sql --
+680
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+660
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+119
+
+-- !sql --
+223
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+438
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+515
+
+-- !sql --
+195
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+52
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+40
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+58
+
+-- !sql --
+15
+
+-- !sql --
+391
+
+-- !sql --
+391
+
+-- !sql --
+391
+
+-- !sql --
+391
+
+-- !sql --
+391
+
+-- !sql --
+98
+
+-- !sql --
+79
+
+-- !sql --
+2
+
+-- !sql --
+12
+
+-- !sql --
+16
+
+-- !sql --
+220
+
+-- !sql --
+42
+
diff --git a/regression-test/data/dynamic_table_p0/lowercase.json b/regression-test/data/dynamic_table_p0/lowercase.json
new file mode 100644
index 0000000000..b5fed5f0a0
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/lowercase.json
@@ -0,0 +1,4 @@
+{"xxx": 1234}
+{"xxx": "5679"}
+{"XXX": "5679"}
+{"XXX": "5679"}
diff --git a/regression-test/data/dynamic_table_p0/sql/q05.out b/regression-test/data/dynamic_table_p0/sql/q05.out
index e53460d268..583b8101a4 100644
--- a/regression-test/data/dynamic_table_p0/sql/q05.out
+++ b/regression-test/data/dynamic_table_p0/sql/q05.out
@@ -3,27 +3,27 @@
 0
 
 -- !q05_2 --
-30417
+42890
 
 -- !q05_3 --
+oliver006/elasticsearch-gmail	37
+prakhar1989/awesome-courses	28
 getguesstimate/guesstimate-app	26
+cachethq/Cachet	17
 ericelliott/essential-javascript-links	16
-FreeCodeCamp/FreeCodeCamp	14
-tj/frontend-boilerplate	14
-prakhar1989/awesome-courses	12
 
 -- !q05_4 --
 3487211075
 
 -- !q05_5 --
-94350289813772
+125406378528208
 
 -- !q05_6 --
-518892546
+704293861
 
 -- !q05_7 --
 0
 
 -- !q05_8 --
-56947
+85450
 
diff --git a/regression-test/data/dynamic_table_p0/uppercase.json b/regression-test/data/dynamic_table_p0/uppercase.json
new file mode 100644
index 0000000000..a1236a9ec9
--- /dev/null
+++ b/regression-test/data/dynamic_table_p0/uppercase.json
@@ -0,0 +1,4 @@
+{"A":1}
+{"B":1}
+{"C":1}
+{"D":1}
diff --git a/regression-test/suites/dynamic_table_p0/load.groovy b/regression-test/suites/dynamic_table_p0/load.groovy
index 4837ca3cd1..6ac6836194 100644
--- a/regression-test/suites/dynamic_table_p0/load.groovy
+++ b/regression-test/suites/dynamic_table_p0/load.groovy
@@ -126,6 +126,28 @@ suite("regression_test_dynamic_table", "dynamic_table"){
     sql """insert into test_ghdata_json_unique select * from test_ghdata_json"""
     sql """insert into test_btc_json_unique select * from test_btc_json"""
 
+    // abnormal cases
+    table_name = "abnormal_cases" 
+    sql """
+            DROP TABLE IF EXISTS ${table_name};
+    """
+    sql """
+            CREATE TABLE IF NOT EXISTS ${table_name} (
+                qid bigint,
+                XXXX bigint,
+		        ...
+            )
+            DUPLICATE KEY(`qid`)
+            DISTRIBUTED BY HASH(`qid`) BUCKETS 5 
+            properties("replication_num" = "1");
+    """
+    load_json_data.call(table_name, 'true', 'json', 'true', "invalid_dimension.json", 'false')
+    load_json_data.call(table_name, 'true', 'json', 'true', "invalid_format.json", 'false')
+    load_json_data.call(table_name, 'true', 'json', 'true', "floating_point.json", 'true')
+    load_json_data.call(table_name, 'true', 'json', 'true', "floating_point2.json", 'true')
+    load_json_data.call(table_name, 'true', 'json', 'true', "floating_point3.json", 'true')
+    load_json_data.call(table_name, 'true', 'json', 'true', "uppercase.json", 'true')
+
     // load more
     table_name = "gharchive";
     sql "DROP TABLE IF EXISTS ${table_name}"
@@ -145,6 +167,7 @@ suite("regression_test_dynamic_table", "dynamic_table"){
         ); 
         """
     def paths = [
+        """${getS3Url() + '/regression/gharchive/2015-01-01-22.json'}""",
         """${getS3Url() + '/regression/gharchive/2015-01-01-16.json'}""",
         """${getS3Url() + '/regression/gharchive/2016-01-01-16.json'}""",
     ]
@@ -176,4 +199,58 @@ suite("regression_test_dynamic_table", "dynamic_table"){
             }
         }
     }
+
+    sql 'sync'
+    meta = sql_meta 'select * from gharchive limit 1'
+    def array_cols = [
+        "payload.commits.url",
+        "payload.commits.sha",
+        "payload.commits.author.email",
+        "payload.commits.distinct",
+        "payload.commits.author.name",
+        "payload.commits.message",
+        "payload.issue.labels.name",
+        "payload.issue.labels.color",
+        "payload.issue.labels.url",
+        "payload.pages.title",
+        "payload.pages.html_url",
+        "payload.pages.sha",
+        "payload.pages.action",
+        "payload.pages.page_name",
+        "payload.release.assets.uploader.repos_url",
+        "payload.release.assets.uploader.id",
+        "payload.release.assets.uploader.organizations_url",
+        "payload.release.assets.uploader.received_events_url",
+        "payload.release.assets.uploader.site_admin",
+        "payload.release.assets.uploader.subscriptions_url",
+        "payload.release.assets.state",
+        "payload.release.assets.size",
+        "payload.release.assets.uploader.following_url",
+        "payload.release.assets.uploader.starred_url",
+        "payload.release.assets.download_count",
+        "payload.release.assets.created_at",
+        "payload.release.assets.updated_at",
+        "payload.release.assets.browser_download_url",
+        "payload.release.assets.url",
+        "payload.release.assets.uploader.gravatar_id",
+        "payload.release.assets.uploader.gists_url",
+        "payload.release.assets.uploader.url",
+        "payload.release.assets.content_type",
+        "payload.release.assets.name",
+        "payload.release.assets.uploader.login",
+        "payload.release.assets.uploader.avatar_url",
+        "payload.release.assets.uploader.html_url",
+        "payload.release.assets.uploader.followers_url",
+        "payload.release.assets.uploader.events_url",
+        "payload.release.assets.uploader.type",
+        "payload.release.assets.id",
+        "payload.release.assets.label"
+    ]
+    for (List<String> col_meta in meta) {
+        if (col_meta[0] in array_cols) {
+            qt_sql "select sum(array_size(`${col_meta[0]}`)) from gharchive"
+        } else {
+            qt_sql "select count(`${col_meta[0]}`) from gharchive"
+        }
+    } 
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org