You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "eldenmoon (via GitHub)" <gi...@apache.org> on 2023/04/19 12:58:07 UTC

[GitHub] [doris] eldenmoon commented on a diff in pull request #18819: [Refact](type system)refact interconversion for jsonb with column

eldenmoon commented on code in PR #18819:
URL: https://github.com/apache/doris/pull/18819#discussion_r1171285918


##########
be/src/vec/data_types/serde/data_type_decimal_serde.h:
##########
@@ -71,5 +76,47 @@ Status DataTypeDecimalSerDe<T>::read_column_from_pb(IColumn& column, const PValu
 
     return Status::NotSupported("unknown ColumnType for reading from pb");
 }
+
+template <typename T>
+Status DataTypeDecimalSerDe<T>::write_column_to_jsonb(const IColumn& column, JsonbWriter& result,
+                                                      Arena* mem_pool, const int32_t col_id,
+                                                      const int row_num) const {
+    StringRef data_ref = column.get_data_at(row_num);
+    result.writeKey(col_id);
+    if constexpr (std::is_same_v<T, Decimal<Int128>>) {
+        Decimal128::NativeType val =
+                *reinterpret_cast<const Decimal128::NativeType*>(data_ref.data);
+        result.writeInt128(val);
+    } else if constexpr (std::is_same_v<T, Decimal<Int128I>>) {
+        Decimal64::NativeType val = *reinterpret_cast<const Decimal64::NativeType*>(data_ref.data);
+        result.writeInt128(val);
+    } else if constexpr (std::is_same_v<T, Decimal<Int32>>) {
+        Decimal32::NativeType val = *reinterpret_cast<const Decimal32::NativeType*>(data_ref.data);
+        result.writeInt32(val);
+    } else if constexpr (std::is_same_v<T, Decimal<Int64>>) {
+        Decimal64::NativeType val = *reinterpret_cast<const Decimal64::NativeType*>(data_ref.data);
+        result.writeInt64(val);
+    } else {
+        return Status::NotSupported("unknown Column '{}' for writing to jsonb", column.get_name());
+    }
+    return Status::OK();
+}
+
+template <typename T>
+Status DataTypeDecimalSerDe<T>::read_column_from_jsonb(IColumn& column,
+                                                       const JsonbValue* arg) const {
+    if constexpr (std::is_same_v<T, Decimal<Int128>>) {
+        column.insert(static_cast<const JsonbInt128Val*>(arg)->val());

Review Comment:
   insert(Feild) maybe slower than insert_data(const char*)



##########
be/src/vec/data_types/serde/data_type_array_serde.h:
##########
@@ -32,6 +33,11 @@ class DataTypeArraySerDe : public DataTypeSerDe {
         LOG(FATAL) << "Not support read from pb to array";
     }
 
+    Status write_column_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool,

Review Comment:
   why should this fuction return Status?



##########
be/src/vec/data_types/serde/data_type_array_serde.h:
##########
@@ -32,6 +33,11 @@ class DataTypeArraySerDe : public DataTypeSerDe {
         LOG(FATAL) << "Not support read from pb to array";
     }
 
+    Status write_column_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool,
+                                 const int32_t col_id, const int row_num) const override;

Review Comment:
   int32_t col_id should be ok, const here is useless



##########
be/src/vec/jsonb/serialize.cpp:
##########
@@ -17,268 +17,265 @@
 
 #include "vec/jsonb/serialize.h"
 
-#include "olap/hll.h"
 #include "olap/tablet_schema.h"
-#include "runtime/jsonb_value.h"
 #include "util/jsonb_stream.h"
 #include "util/jsonb_writer.h"
 #include "vec/common/arena.h"
-#include "vec/core/types.h"
 
 namespace doris::vectorized {
 
-static inline bool is_column_null_at(int row, const IColumn* column, const doris::FieldType& type,
-                                     const StringRef& data_ref) {
-    if (type != FieldType::OLAP_FIELD_TYPE_ARRAY) {
-        return data_ref.data == nullptr;
-    } else {
-        Field array;
-        column->get(row, array);
-        return array.is_null();
-    }
-}
-
-static bool is_jsonb_blob_type(FieldType type) {
-    return type == FieldType::OLAP_FIELD_TYPE_VARCHAR || type == FieldType::OLAP_FIELD_TYPE_CHAR ||
-           type == FieldType::OLAP_FIELD_TYPE_STRING || type == FieldType::OLAP_FIELD_TYPE_STRUCT ||
-           type == FieldType::OLAP_FIELD_TYPE_ARRAY || type == FieldType::OLAP_FIELD_TYPE_MAP ||
-           type == FieldType::OLAP_FIELD_TYPE_HLL || type == FieldType::OLAP_FIELD_TYPE_OBJECT ||
-           type == FieldType::OLAP_FIELD_TYPE_JSONB;
-}
-
-// jsonb -> column value
-static void deserialize_column(PrimitiveType type, JsonbValue* slot_value, MutableColumnPtr& dst) {
-    if (type == TYPE_ARRAY) {
-        assert(slot_value->isBinary());
-        auto blob = static_cast<JsonbBlobVal*>(slot_value);
-        dst->deserialize_and_insert_from_arena(blob->getBlob());
-    } else if (type == TYPE_OBJECT) {
-        assert(slot_value->isBinary());
-        auto blob = static_cast<JsonbBlobVal*>(slot_value);
-        BitmapValue bitmap_value;
-        bitmap_value.deserialize(blob->getBlob());
-        dst->insert_data(reinterpret_cast<const char*>(&bitmap_value), sizeof(BitmapValue));
-    } else if (type == TYPE_HLL) {
-        assert(slot_value->isBinary());
-        auto blob = static_cast<JsonbBlobVal*>(slot_value);
-        HyperLogLog hyper_log_log;
-        Slice data {blob->getBlob(), blob->getBlobLen()};
-        hyper_log_log.deserialize(data);
-        dst->insert_data(reinterpret_cast<const char*>(&hyper_log_log), sizeof(HyperLogLog));
-    } else if (is_string_type(type)) {
-        assert(slot_value->isBinary());
-        auto blob = static_cast<JsonbBlobVal*>(slot_value);
-        dst->insert_data(blob->getBlob(), blob->getBlobLen());
-    } else {
-        switch (type) {
-        case TYPE_BOOLEAN: {
-            assert(slot_value->isInt8());
-            dst->insert(static_cast<JsonbInt8Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_TINYINT: {
-            assert(slot_value->isInt8());
-            dst->insert(static_cast<JsonbInt8Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_SMALLINT: {
-            assert(slot_value->isInt16());
-            dst->insert(static_cast<JsonbInt16Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_INT: {
-            assert(slot_value->isInt32());
-            dst->insert(static_cast<JsonbInt32Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_BIGINT: {
-            assert(slot_value->isInt64());
-            dst->insert(static_cast<JsonbInt64Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_LARGEINT: {
-            assert(slot_value->isInt128());
-            dst->insert(static_cast<JsonbInt128Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_FLOAT: {
-            dst->insert(static_cast<JsonbFloatVal*>(slot_value)->val());
-            break;
-        }
-        case TYPE_DOUBLE: {
-            dst->insert(static_cast<JsonbDoubleVal*>(slot_value)->val());
-            break;
-        }
-        case TYPE_DATE: {
-            assert(slot_value->isInt32());
-            int32_t val = static_cast<JsonbInt32Val*>(slot_value)->val();
-            dst->insert_many_fix_len_data(reinterpret_cast<const char*>(&val), 1);
-            break;
-        }
-        case TYPE_DATETIME: {
-            assert(slot_value->isInt64());
-            int64_t val = static_cast<JsonbInt64Val*>(slot_value)->val();
-            dst->insert_many_fix_len_data(reinterpret_cast<const char*>(&val), 1);
-            break;
-        }
-        case TYPE_DATEV2: {
-            assert(slot_value->isInt32());
-            dst->insert(static_cast<JsonbInt32Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_DATETIMEV2: {
-            assert(slot_value->isInt64());
-            dst->insert(static_cast<JsonbInt64Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_DECIMAL32: {
-            assert(slot_value->isInt32());
-            dst->insert(static_cast<JsonbInt32Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_DECIMAL64: {
-            assert(slot_value->isInt64());
-            dst->insert(static_cast<JsonbInt64Val*>(slot_value)->val());
-            break;
-        }
-        case TYPE_DECIMAL128I: {
-            assert(slot_value->isInt128());
-            dst->insert(static_cast<JsonbInt128Val*>(slot_value)->val());
-            break;
-        }
-        default:
-            LOG(FATAL) << "unknow type " << type;
-            break;
-        }
-    }
-}
-
-// column value -> jsonb
-static void serialize_column(Arena* mem_pool, const TabletColumn& tablet_column,
-                             const IColumn* column, const StringRef& data_ref, int row,
-                             JsonbWriterT<JsonbOutStream>& jsonb_writer) {
-    if (is_column_null_at(row, column, tablet_column.type(), data_ref)) {
-        // Do nothing
-        return;
-    }
-    jsonb_writer.writeKey(tablet_column.unique_id());
-    if (tablet_column.is_array_type()) {
-        const char* begin = nullptr;
-        StringRef value = column->serialize_value_into_arena(row, *mem_pool, begin);
-        jsonb_writer.writeStartBinary();
-        jsonb_writer.writeBinary(value.data, value.size);
-        jsonb_writer.writeEndBinary();
-    } else if (tablet_column.type() == FieldType::OLAP_FIELD_TYPE_OBJECT) {
-        auto bitmap_value = (BitmapValue*)(data_ref.data);
-        auto size = bitmap_value->getSizeInBytes();
-        // serialize the content of string
-        auto ptr = mem_pool->alloc(size);
-        bitmap_value->write_to(reinterpret_cast<char*>(ptr));
-        jsonb_writer.writeStartBinary();
-        jsonb_writer.writeBinary(reinterpret_cast<const char*>(ptr), size);
-        jsonb_writer.writeEndBinary();
-    } else if (tablet_column.type() == FieldType::OLAP_FIELD_TYPE_HLL) {
-        auto hll_value = (HyperLogLog*)(data_ref.data);
-        auto size = hll_value->max_serialized_size();
-        auto ptr = reinterpret_cast<char*>(mem_pool->alloc(size));
-        size_t actual_size = hll_value->serialize((uint8_t*)ptr);
-        jsonb_writer.writeStartBinary();
-        jsonb_writer.writeBinary(reinterpret_cast<const char*>(ptr), actual_size);
-        jsonb_writer.writeEndBinary();
-    } else if (is_jsonb_blob_type(tablet_column.type())) {
-        jsonb_writer.writeStartBinary();
-        jsonb_writer.writeBinary(reinterpret_cast<const char*>(data_ref.data), data_ref.size);
-        jsonb_writer.writeEndBinary();
-    } else {
-        switch (tablet_column.type()) {
-        case FieldType::OLAP_FIELD_TYPE_BOOL: {
-            int8_t val = *reinterpret_cast<const int8_t*>(data_ref.data);
-            jsonb_writer.writeInt8(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_TINYINT: {
-            int8_t val = *reinterpret_cast<const int8_t*>(data_ref.data);
-            jsonb_writer.writeInt8(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_SMALLINT: {
-            int16_t val = *reinterpret_cast<const int16_t*>(data_ref.data);
-            jsonb_writer.writeInt16(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_INT: {
-            int32_t val = *reinterpret_cast<const int32_t*>(data_ref.data);
-            jsonb_writer.writeInt32(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_BIGINT: {
-            int64_t val = *reinterpret_cast<const int64_t*>(data_ref.data);
-            jsonb_writer.writeInt64(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_LARGEINT: {
-            __int128_t val = *reinterpret_cast<const __int128_t*>(data_ref.data);
-            jsonb_writer.writeInt128(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_FLOAT: {
-            float val = *reinterpret_cast<const float*>(data_ref.data);
-            jsonb_writer.writeFloat(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DOUBLE: {
-            double val = *reinterpret_cast<const double*>(data_ref.data);
-            jsonb_writer.writeDouble(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DATE: {
-            const auto* datetime_cur = reinterpret_cast<const VecDateTimeValue*>(data_ref.data);
-            jsonb_writer.writeInt32(datetime_cur->to_olap_date());
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DATETIME: {
-            const auto* datetime_cur = reinterpret_cast<const VecDateTimeValue*>(data_ref.data);
-            jsonb_writer.writeInt64(datetime_cur->to_olap_datetime());
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DATEV2: {
-            uint32_t val = *reinterpret_cast<const uint32_t*>(data_ref.data);
-            jsonb_writer.writeInt32(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
-            uint64_t val = *reinterpret_cast<const uint64_t*>(data_ref.data);
-            jsonb_writer.writeInt64(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DECIMAL32: {
-            Decimal32::NativeType val =
-                    *reinterpret_cast<const Decimal32::NativeType*>(data_ref.data);
-            jsonb_writer.writeInt32(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DECIMAL64: {
-            Decimal64::NativeType val =
-                    *reinterpret_cast<const Decimal64::NativeType*>(data_ref.data);
-            jsonb_writer.writeInt64(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: {
-            Decimal128I::NativeType val =
-                    *reinterpret_cast<const Decimal128I::NativeType*>(data_ref.data);
-            jsonb_writer.writeInt128(val);
-            break;
-        }
-        case FieldType::OLAP_FIELD_TYPE_DECIMAL:
-            LOG(FATAL)
-                    << "FieldType::OLAP_FIELD_TYPE_DECIMAL not implemented use DecimalV3 instead";
-            break;
-        default:
-            LOG(FATAL) << "unknow type " << int(tablet_column.type());
-            break;
-        }
-    }
-}
+//static inline bool is_column_null_at(int row, const IColumn* column, const doris::FieldType& type,

Review Comment:
   remove unused code



##########
be/src/vec/data_types/serde/data_type_number_serde.h:
##########
@@ -157,5 +162,62 @@ Status DataTypeNumberSerDe<T>::write_column_to_pb(const IColumn& column, PValues
     return Status::OK();
 }
 
+template <typename T>
+Status DataTypeNumberSerDe<T>::read_column_from_jsonb(IColumn& column,
+                                                      const JsonbValue* arg) const {
+    if constexpr (std::is_same_v<T, Int8>) {
+        column.insert(static_cast<const JsonbInt8Val*>(arg)->val());

Review Comment:
   same as above, insert(Feild) is slower



##########
be/src/vec/data_types/serde/data_type_array_serde.h:
##########
@@ -32,6 +33,11 @@ class DataTypeArraySerDe : public DataTypeSerDe {
         LOG(FATAL) << "Not support read from pb to array";
     }
 
+    Status write_column_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool,

Review Comment:
   I think we should not return any status for this function



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org