You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/29 04:37:31 UTC

[doris] branch branch-1.2-lts updated (7d8a94604e -> be513e1062)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


    from 7d8a94604e [Bug] #14876 && #15225 have some bugs in rewrite or to in, revert them (#15420)
     new 740363019c [feature](BE)pad missed version with empty rowset (#15030)
     new b204d46615 [Improvement](JSONB) improve performance JSONB initial json parsing using simdjson (#15219)
     new d2fa5078b9 [bug](jdbc) fix jdbc external table with char type length error (#15386)
     new 5b99fee593 [Improvement](meta) update show create function result (#15414)
     new be513e1062 [fix](multi catalog)Set column defualt value for query. (#15415)

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .licenserc.yaml                                    |   1 +
 be/CMakeLists.txt                                  |   2 +
 be/src/http/CMakeLists.txt                         |   1 +
 be/src/http/action/pad_rowset_action.cpp           | 105 +++++++
 .../action/{meta_action.h => pad_rowset_action.h}  |  27 +-
 be/src/runtime/jsonb_value.h                       |   4 +-
 be/src/service/http_service.cpp                    |   4 +
 be/src/util/jsonb_error.h                          |  10 +-
 be/src/util/jsonb_parser_simd.h                    | 350 +++++++++++++++++++++
 be/src/vec/exec/scan/new_jdbc_scan_node.cpp        |   7 +-
 be/src/vec/exec/scan/new_jdbc_scan_node.h          |   1 +
 be/src/vec/exec/scan/new_jdbc_scanner.cpp          |   7 +-
 be/src/vec/exec/scan/new_jdbc_scanner.h            |   6 +-
 be/src/vec/exec/scan/vfile_scanner.cpp             |  39 +--
 be/src/vec/exec/vjdbc_connector.cpp                |  13 +-
 be/src/vec/exec/vjdbc_connector.h                  |   1 +
 be/src/vec/functions/function_jsonb.cpp            |   9 +-
 be/test/olap/tablet_test.cpp                       |  55 ++++
 .../{restore-tablet.md => pad-rowset.md}           |  12 +-
 docs/sidebars.json                                 |   1 +
 .../{restore-tablet.md => pad_rowset.md}           |  14 +-
 .../apache/doris/catalog/AggregateFunction.java    |  35 ++-
 .../org/apache/doris/catalog/ScalarFunction.java   |  14 +-
 .../org/apache/doris/planner/JdbcScanNode.java     |   1 +
 gensrc/thrift/PlanNodes.thrift                     |   1 +
 25 files changed, 642 insertions(+), 78 deletions(-)
 create mode 100644 be/src/http/action/pad_rowset_action.cpp
 copy be/src/http/action/{meta_action.h => pad_rowset_action.h} (71%)
 create mode 100644 be/src/util/jsonb_parser_simd.h
 copy docs/en/docs/admin-manual/http-actions/{restore-tablet.md => pad-rowset.md} (71%)
 copy docs/zh-CN/docs/admin-manual/http-actions/{restore-tablet.md => pad_rowset.md} (71%)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 02/05: [Improvement](JSONB) improve performance JSONB initial json parsing using simdjson (#15219)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b204d46615608116e645a5f1c060a6528a2a005f
Author: Kang <kx...@gmail.com>
AuthorDate: Thu Dec 29 09:29:09 2022 +0800

    [Improvement](JSONB) improve performance JSONB initial json parsing using simdjson (#15219)
    
    test data: https://data.gharchive.org/2020-11-13-18.json.gz, 2GB, 197696 lines
    before: String 13s vs. JSONB 28s
    after: String 13s vs. JSONB 16s
    
    **NOTICE: simdjson need to be patched since BOOL is conflicted with a macro BOOL defined in odbc sqltypes.h**
---
 .licenserc.yaml                         |   1 +
 be/CMakeLists.txt                       |   2 +
 be/src/runtime/jsonb_value.h            |   4 +-
 be/src/util/jsonb_error.h               |  10 +-
 be/src/util/jsonb_parser_simd.h         | 350 ++++++++++++++++++++++++++++++++
 be/src/vec/functions/function_jsonb.cpp |   9 +-
 6 files changed, 369 insertions(+), 7 deletions(-)

diff --git a/.licenserc.yaml b/.licenserc.yaml
index d458e45269..020ee7b4e8 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -52,6 +52,7 @@ header:
     - "be/src/util/jsonb_document.h"
     - "be/src/util/jsonb_error.h"
     - "be/src/util/jsonb_parser.h"
+    - "be/src/util/jsonb_parser_simd.h"
     - "be/src/util/jsonb_stream.h"
     - "be/src/util/jsonb_updater.h"
     - "be/src/util/jsonb_utils.h"
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index cf51ddc492..915c207189 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -492,6 +492,8 @@ if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR "${CMAKE_BUILD_TARGET_ARCH}"
     if (USE_AVX2)
         set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -mavx2")
     endif()
+    # set -mlzcnt for leading zero count used by simdjson
+    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -msse4.2")
 endif()
 set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-attributes -DS2_USE_GFLAGS -DS2_USE_GLOG")
 
diff --git a/be/src/runtime/jsonb_value.h b/be/src/runtime/jsonb_value.h
index bdb5ba4976..1e03518d9d 100644
--- a/be/src/runtime/jsonb_value.h
+++ b/be/src/runtime/jsonb_value.h
@@ -22,7 +22,7 @@
 #include "util/cpu_info.h"
 #include "util/hash_util.hpp"
 #include "util/jsonb_error.h"
-#include "util/jsonb_parser.h"
+#include "util/jsonb_parser_simd.h"
 #include "util/jsonb_utils.h"
 #include "vec/common/string_ref.h"
 
@@ -38,7 +38,7 @@ struct JsonBinaryValue {
     // default nullprt and size 0 for invalid or NULL value
     const char* ptr = nullptr;
     size_t len = 0;
-    JsonbParser parser;
+    JsonbParserSIMD parser;
 
     JsonBinaryValue() : ptr(nullptr), len(0) {}
     JsonBinaryValue(char* ptr, int len) { from_json_string(const_cast<const char*>(ptr), len); }
diff --git a/be/src/util/jsonb_error.h b/be/src/util/jsonb_error.h
index 77d6fa16d0..2ad632fb8b 100644
--- a/be/src/util/jsonb_error.h
+++ b/be/src/util/jsonb_error.h
@@ -30,12 +30,14 @@ enum class JsonbErrType {
     E_EMPTY_DOCUMENT,
     E_OUTPUT_FAIL,
     E_INVALID_DOCU,
+    E_INVALID_TYPE,
     E_INVALID_SCALAR_VALUE,
     E_INVALID_KEY_STRING,
     E_INVALID_KEY_LENGTH,
     E_INVALID_STR,
     E_INVALID_OBJ,
     E_INVALID_ARR,
+    E_INVALID_NUMBER,
     E_INVALID_HEX,
     E_INVALID_OCTAL,
     E_INVALID_DECIMAL,
@@ -53,6 +55,7 @@ enum class JsonbErrType {
     E_INVALID_JSONB_OBJ,
     E_NESTING_LVL_OVERFLOW,
     E_INVALID_DOCU_COMPAT,
+    E_EXCEPTION,
 
     // new error code should always be added above
     E_NUM_ERRORS
@@ -77,13 +80,15 @@ private:
             "Invalid document version",
             "Empty document",
             "Fatal error in writing JSONB",
-            "Invalid document: document must be an object or an array",
+            "Invalid document",
+            "Invalid json value type",
             "Invalid scalar value",
             "Invalid key string",
             "Key length exceeds maximum size allowed (64 bytes)",
             "Invalid string value",
             "Invalid JSON object",
             "Invalid JSON array",
+            "Invalid number",
             "Invalid HEX number",
             "Invalid octal number",
             "Invalid decimal number",
@@ -100,7 +105,8 @@ private:
             "Invalid update operation",
             "Invalid JSONB object (internal)",
             "Object or array has too many nesting levels",
-            "Invalid document: document must be an object or an array",
+            "Invalid document",
+            "Exception throwed",
 
             nullptr /* E_NUM_ERRORS */
     };
diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h
new file mode 100644
index 0000000000..10d19a3f57
--- /dev/null
+++ b/be/src/util/jsonb_parser_simd.h
@@ -0,0 +1,350 @@
+/*
+ *  Copyright (c) 2014, Facebook, Inc.
+ *  All rights reserved.
+ *
+ *  This source code is licensed under the BSD-style license found in the
+ *  LICENSE file in the root directory of this source tree. An additional grant
+ *  of patent rights can be found in the PATENTS file in the same directory.
+ *
+ */
+
+/*
+ * This file defines JsonbParserTSIMD (template) and JsonbParser.
+ *
+ * JsonbParserTSIMD is a template class which implements a JSON parser.
+ * JsonbParserTSIMD parses JSON text, and serialize it to JSONB binary format
+ * by using JsonbWriterT object. By default, JsonbParserTSIMD creates a new
+ * JsonbWriterT object with an output stream object.  However, you can also
+ * pass in your JsonbWriterT or any stream object that implements some basic
+ * interface of std::ostream (see JsonbStream.h).
+ *
+ * JsonbParser specializes JsonbParserTSIMD with JsonbOutStream type (see
+ * JsonbStream.h). So unless you want to provide own a different output stream
+ * type, use JsonbParser object.
+ *
+ * ** Parsing JSON **
+ * JsonbParserTSIMD parses JSON string, and directly serializes into JSONB
+ * packed bytes. There are three ways to parse a JSON string: (1) using
+ * c-string, (2) using string with len, (3) using std::istream object. You can
+ * use custom streambuf to redirect output. JsonbOutBuffer is a streambuf used
+ * internally if the input is raw character buffer.
+ *
+ * You can reuse an JsonbParserTSIMD object to parse/serialize multiple JSON
+ * strings, and the previous JSONB will be overwritten.
+ *
+ * If parsing fails (returned false), the error code will be set to one of
+ * JsonbErrType, and can be retrieved by calling getErrorCode().
+ *
+ * ** External dictionary **
+ * During parsing a JSON string, you can pass a call-back function to map a key
+ * string to an id, and store the dictionary id in JSONB to save space. The
+ * purpose of using an external dictionary is more towards a collection of
+ * documents (which has common keys) rather than a single document, so that
+ * space saving will be significant.
+ *
+ * ** Endianness **
+ * Note: JSONB serialization doesn't assume endianness of the server. However
+ * you will need to ensure that the endianness at the reader side is the same
+ * as that at the writer side (if they are on different machines). Otherwise,
+ * proper conversion is needed when a number value is returned to the
+ * caller/writer.
+ *
+ * @author Tian Xia <ti...@fb.com>
+ * 
+ * this file is copied from 
+ * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonJsonParser.h
+ * and modified by Doris
+ */
+
+#ifndef JSONB_JSONBJSONPARSERSIMD_H
+#define JSONB_JSONBJSONPARSERSIMD_H
+
+#include <simdjson.h>
+
+#include <cmath>
+#include <limits>
+
+#include "jsonb_document.h"
+#include "jsonb_error.h"
+#include "jsonb_writer.h"
+#include "string_parser.hpp"
+
+namespace doris {
+
+/*
+ * Template JsonbParserTSIMD
+ */
+template <class OS_TYPE>
+class JsonbParserTSIMD {
+public:
+    JsonbParserTSIMD() : err_(JsonbErrType::E_NONE) {}
+
+    explicit JsonbParserTSIMD(OS_TYPE& os) : writer_(os), err_(JsonbErrType::E_NONE) {}
+
+    // parse a UTF-8 JSON string
+    bool parse(const std::string& str, hDictInsert handler = nullptr) {
+        return parse(str.c_str(), (unsigned int)str.size(), handler);
+    }
+
+    // parse a UTF-8 JSON c-style string (NULL terminated)
+    bool parse(const char* c_str, hDictInsert handler = nullptr) {
+        return parse(c_str, (unsigned int)strlen(c_str), handler);
+    }
+
+    // parse a UTF-8 JSON string with length
+    bool parse(const char* pch, unsigned int len, hDictInsert handler = nullptr) {
+        // reset state before parse
+        reset();
+
+        if (!pch || len == 0) {
+            err_ = JsonbErrType::E_EMPTY_DOCUMENT;
+            LOG(WARNING) << "empty json string";
+            return false;
+        }
+
+        // parse json using simdjson, return false on exception
+        try {
+            simdjson::padded_string json_str {pch, len};
+            simdjson::ondemand::document doc = parser_.iterate(json_str);
+
+            // simdjson process top level primitive types specially
+            // so some repeated code here
+            switch (doc.type()) {
+            case simdjson::ondemand::json_type::object:
+            case simdjson::ondemand::json_type::array: {
+                parse(doc.get_value(), handler);
+                break;
+            }
+            case simdjson::ondemand::json_type::null: {
+                if (writer_.writeNull() == 0) {
+                    err_ = JsonbErrType::E_OUTPUT_FAIL;
+                    LOG(WARNING) << "writeNull failed";
+                }
+                break;
+            }
+            case simdjson::ondemand::json_type::boolean: {
+                if (writer_.writeBool(doc.get_bool()) == 0) {
+                    err_ = JsonbErrType::E_OUTPUT_FAIL;
+                    LOG(WARNING) << "writeBool failed";
+                }
+                break;
+            }
+            case simdjson::ondemand::json_type::string: {
+                write_string(doc.get_string());
+                break;
+            }
+            case simdjson::ondemand::json_type::number: {
+                write_number(doc.get_number());
+                break;
+            }
+            }
+
+            return err_ == JsonbErrType::E_NONE;
+        } catch (simdjson::simdjson_error& e) {
+            err_ = JsonbErrType::E_EXCEPTION;
+            LOG(WARNING) << "simdjson parse exception: " << e.what();
+            return false;
+        }
+    }
+
+    // parse json, recursively if necessary, by simdjson
+    //  and serialize to binary format by writer
+    void parse(simdjson::ondemand::value value, hDictInsert handler = nullptr) {
+        switch (value.type()) {
+        case simdjson::ondemand::json_type::null: {
+            if (writer_.writeNull() == 0) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeNull failed";
+            }
+            break;
+        }
+        case simdjson::ondemand::json_type::boolean: {
+            if (writer_.writeBool(value.get_bool()) == 0) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeBool failed";
+            }
+            break;
+        }
+        case simdjson::ondemand::json_type::string: {
+            write_string(value.get_string());
+            break;
+        }
+        case simdjson::ondemand::json_type::number: {
+            write_number(value.get_number());
+            break;
+        }
+        case simdjson::ondemand::json_type::object: {
+            if (!writer_.writeStartObject()) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeStartObject failed";
+                break;
+            }
+
+            for (auto kv : value.get_object()) {
+                std::string_view key;
+                simdjson::error_code e = kv.unescaped_key().get(key);
+                if (e != simdjson::SUCCESS) {
+                    err_ = JsonbErrType::E_INVALID_KEY_STRING;
+                    LOG(WARNING) << "simdjson get key failed: " << e;
+                    break;
+                }
+
+                int key_id = -1;
+                if (handler) {
+                    key_id = handler(key.data(), key.size());
+                }
+
+                if (key_id < 0) {
+                    if (writer_.writeKey(key.data(), key.size()) == 0) {
+                        err_ = JsonbErrType::E_OUTPUT_FAIL;
+                        LOG(WARNING) << "writeKey failed key: " << key;
+                        break;
+                    }
+                } else {
+                    if (writer_.writeKey(key_id) == 0) {
+                        err_ = JsonbErrType::E_OUTPUT_FAIL;
+                        LOG(WARNING) << "writeKey failed key_id: " << key_id;
+                        break;
+                    }
+                }
+
+                // parse object value
+                parse(kv.value(), handler);
+                if (err_ != JsonbErrType::E_NONE) {
+                    LOG(WARNING) << "parse object value failed";
+                    break;
+                }
+            }
+            if (err_ != JsonbErrType::E_NONE) {
+                break;
+            }
+
+            if (!writer_.writeEndObject()) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeEndObject failed";
+                break;
+            }
+
+            break;
+        }
+        case simdjson::ondemand::json_type::array: {
+            if (!writer_.writeStartArray()) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeStartArray failed";
+                break;
+            }
+
+            for (auto elem : value.get_array()) {
+                // parse array element
+                parse(elem.value(), handler);
+                if (err_ != JsonbErrType::E_NONE) {
+                    LOG(WARNING) << "parse array element failed";
+                    break;
+                }
+            }
+            if (err_ != JsonbErrType::E_NONE) {
+                break;
+            }
+
+            if (!writer_.writeEndArray()) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeEndArray failed";
+                break;
+            }
+
+            break;
+        }
+        default: {
+            err_ = JsonbErrType::E_INVALID_TYPE;
+            LOG(WARNING) << "unknown value type: "; // << value;
+            break;
+        }
+
+        } // end of switch
+    }
+
+    void write_string(std::string_view str) {
+        // start writing string
+        if (!writer_.writeStartString()) {
+            err_ = JsonbErrType::E_OUTPUT_FAIL;
+            LOG(WARNING) << "writeStartString failed";
+            return;
+        }
+
+        // write string
+        if (str.size() > 0) {
+            if (writer_.writeString(str.data(), str.size()) == 0) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeString failed";
+                return;
+            }
+        }
+
+        // end writing string
+        if (!writer_.writeEndString()) {
+            err_ = JsonbErrType::E_OUTPUT_FAIL;
+            LOG(WARNING) << "writeEndString failed";
+            return;
+        }
+    }
+
+    void write_number(simdjson::ondemand::number num) {
+        if (num.is_double()) {
+            if (writer_.writeDouble(num.get_double()) == 0) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeDouble failed";
+                return;
+            }
+        } else if (num.is_int64() || num.is_uint64()) {
+            if (num.is_uint64() && num.get_uint64() > std::numeric_limits<int64_t>::max()) {
+                err_ = JsonbErrType::E_OCTAL_OVERFLOW;
+                LOG(WARNING) << "overflow number: " << num.get_uint64();
+                return;
+            }
+            int64_t val = num.is_int64() ? num.get_int64() : num.get_uint64();
+            int size = 0;
+            if (val <= std::numeric_limits<int8_t>::max()) {
+                size = writer_.writeInt8((int8_t)val);
+            } else if (val <= std::numeric_limits<int16_t>::max()) {
+                size = writer_.writeInt16((int16_t)val);
+            } else if (val <= std::numeric_limits<int32_t>::max()) {
+                size = writer_.writeInt32((int32_t)val);
+            } else { // val <= INT64_MAX
+                size = writer_.writeInt64(val);
+            }
+
+            if (size == 0) {
+                err_ = JsonbErrType::E_OUTPUT_FAIL;
+                LOG(WARNING) << "writeInt failed";
+                return;
+            }
+        } else {
+            err_ = JsonbErrType::E_INVALID_NUMBER;
+            LOG(WARNING) << "invalid number: " << num.as_double();
+            return;
+        }
+    }
+
+    JsonbWriterT<OS_TYPE>& getWriter() { return writer_; }
+
+    JsonbErrType getErrorCode() { return err_; }
+
+    // clear error code
+    void clearErr() { err_ = JsonbErrType::E_NONE; }
+
+    void reset() {
+        writer_.reset();
+        clearErr();
+    }
+
+private:
+    simdjson::ondemand::parser parser_;
+    JsonbWriterT<OS_TYPE> writer_;
+    JsonbErrType err_;
+};
+
+using JsonbParserSIMD = JsonbParserTSIMD<JsonbOutStream>;
+
+} // namespace doris
+
+#endif // JSONB_JSONBJSONPARSERSIMD_H
diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp
index ea84ddf3ae..02f352fb57 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -18,6 +18,7 @@
 #include <boost/token_functions.hpp>
 #include <vector>
 
+// #include "util/jsonb_parser_simd.h"
 #include "util/string_parser.hpp"
 #include "util/string_util.h"
 #include "vec/columns/column.h"
@@ -47,7 +48,7 @@ enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE, RETURN_INV
 template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode>
 class FunctionJsonbParseBase : public IFunction {
 private:
-    JsonbParser default_value_parser;
+    JsonbParserSIMD default_value_parser;
     bool has_const_default_value = false;
 
 public:
@@ -193,6 +194,10 @@ public:
         size_t size = col_from.size();
         col_to->reserve(size);
 
+        // parser can be reused for performance
+        JsonbParserSIMD parser;
+        JsonbErrType error = JsonbErrType::E_NONE;
+
         for (size_t i = 0; i < input_rows_count; ++i) {
             if (col_from.is_null_at(i)) {
                 null_map->get_data()[i] = 1;
@@ -201,8 +206,6 @@ public:
             }
 
             const auto& val = col_from_string->get_data_at(i);
-            JsonbParser parser;
-            JsonbErrType error = JsonbErrType::E_NONE;
             if (parser.parse(val.data, val.size)) {
                 // insert jsonb format data
                 col_to->insert_data(parser.getWriter().getOutput()->getBuffer(),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 03/05: [bug](jdbc) fix jdbc external table with char type length error (#15386)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d2fa5078b9e125830d11b49c8f6d9653e87b05a5
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Thu Dec 29 11:19:03 2022 +0800

    [bug](jdbc) fix jdbc external table with char type length error (#15386)
    
    Now have test pg and oracle with char(100), if data='abc'
    but read string data length is 100, so need trim extral spaces
---
 be/src/vec/exec/scan/new_jdbc_scan_node.cpp                 |  7 ++++---
 be/src/vec/exec/scan/new_jdbc_scan_node.h                   |  1 +
 be/src/vec/exec/scan/new_jdbc_scanner.cpp                   |  7 +++++--
 be/src/vec/exec/scan/new_jdbc_scanner.h                     |  6 ++++--
 be/src/vec/exec/vjdbc_connector.cpp                         | 13 ++++++++++++-
 be/src/vec/exec/vjdbc_connector.h                           |  1 +
 .../main/java/org/apache/doris/planner/JdbcScanNode.java    |  1 +
 gensrc/thrift/PlanNodes.thrift                              |  1 +
 8 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp
index 15ddd607ba..955a33970d 100644
--- a/be/src/vec/exec/scan/new_jdbc_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_jdbc_scan_node.cpp
@@ -25,7 +25,8 @@ NewJdbcScanNode::NewJdbcScanNode(ObjectPool* pool, const TPlanNode& tnode,
         : VScanNode(pool, tnode, descs),
           _table_name(tnode.jdbc_scan_node.table_name),
           _tuple_id(tnode.jdbc_scan_node.tuple_id),
-          _query_string(tnode.jdbc_scan_node.query_string) {
+          _query_string(tnode.jdbc_scan_node.query_string),
+          _table_type(tnode.jdbc_scan_node.table_type) {
     _output_tuple_id = tnode.jdbc_scan_node.tuple_id;
 }
 
@@ -49,8 +50,8 @@ Status NewJdbcScanNode::_init_scanners(std::list<VScanner*>* scanners) {
     if (_eos == true) {
         return Status::OK();
     }
-    NewJdbcScanner* scanner =
-            new NewJdbcScanner(_state, this, _limit_per_scanner, _tuple_id, _query_string);
+    NewJdbcScanner* scanner = new NewJdbcScanner(_state, this, _limit_per_scanner, _tuple_id,
+                                                 _query_string, _table_type);
     _scanner_pool.add(scanner);
     RETURN_IF_ERROR(scanner->prepare(_state, _vconjunct_ctx_ptr.get()));
     scanners->push_back(static_cast<VScanner*>(scanner));
diff --git a/be/src/vec/exec/scan/new_jdbc_scan_node.h b/be/src/vec/exec/scan/new_jdbc_scan_node.h
index d527f24b12..1cc34e83f0 100644
--- a/be/src/vec/exec/scan/new_jdbc_scan_node.h
+++ b/be/src/vec/exec/scan/new_jdbc_scan_node.h
@@ -37,6 +37,7 @@ private:
     std::string _table_name;
     TupleId _tuple_id;
     std::string _query_string;
+    TOdbcTableType::type _table_type;
 };
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.cpp b/be/src/vec/exec/scan/new_jdbc_scanner.cpp
index ef9004530d..cbb9588bbd 100644
--- a/be/src/vec/exec/scan/new_jdbc_scanner.cpp
+++ b/be/src/vec/exec/scan/new_jdbc_scanner.cpp
@@ -19,13 +19,15 @@
 
 namespace doris::vectorized {
 NewJdbcScanner::NewJdbcScanner(RuntimeState* state, NewJdbcScanNode* parent, int64_t limit,
-                               const TupleId& tuple_id, const std::string& query_string)
+                               const TupleId& tuple_id, const std::string& query_string,
+                               TOdbcTableType::type table_type)
         : VScanner(state, static_cast<VScanNode*>(parent), limit),
           _is_init(false),
           _jdbc_eos(false),
           _tuple_id(tuple_id),
           _query_string(query_string),
-          _tuple_desc(nullptr) {}
+          _tuple_desc(nullptr),
+          _table_type(table_type) {}
 
 Status NewJdbcScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) {
     VLOG_CRITICAL << "NewJdbcScanner::Prepare";
@@ -63,6 +65,7 @@ Status NewJdbcScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx
     _jdbc_param.passwd = jdbc_table->jdbc_passwd();
     _jdbc_param.tuple_desc = _tuple_desc;
     _jdbc_param.query_string = std::move(_query_string);
+    _jdbc_param.table_type = _table_type;
 
     _jdbc_connector.reset(new (std::nothrow) JdbcConnector(_jdbc_param));
     if (_jdbc_connector == nullptr) {
diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.h b/be/src/vec/exec/scan/new_jdbc_scanner.h
index f5584cd5d0..9fa17c4116 100644
--- a/be/src/vec/exec/scan/new_jdbc_scanner.h
+++ b/be/src/vec/exec/scan/new_jdbc_scanner.h
@@ -26,12 +26,12 @@ namespace vectorized {
 class NewJdbcScanner : public VScanner {
 public:
     NewJdbcScanner(RuntimeState* state, NewJdbcScanNode* parent, int64_t limit,
-                   const TupleId& tuple_id, const std::string& query_string);
+                   const TupleId& tuple_id, const std::string& query_string,
+                   TOdbcTableType::type table_type);
 
     Status open(RuntimeState* state) override;
     Status close(RuntimeState* state) override;
 
-public:
     Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr);
 
 protected:
@@ -48,6 +48,8 @@ private:
     std::string _query_string;
     // Descriptor of tuples read from JDBC table.
     const TupleDescriptor* _tuple_desc;
+    // the sql query database type: like mysql, PG...
+    TOdbcTableType::type _table_type;
     // Scanner of JDBC.
     std::unique_ptr<JdbcConnector> _jdbc_connector;
     JdbcConnectorParam _jdbc_param;
diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp
index ce171ad569..f2302f3baa 100644
--- a/be/src/vec/exec/vjdbc_connector.cpp
+++ b/be/src/vec/exec/vjdbc_connector.cpp
@@ -418,8 +418,19 @@ Status JdbcConnector::_convert_column_data(JNIEnv* env, jobject jobj,
         M(TYPE_FLOAT, float, vectorized::ColumnVector<vectorized::Float32>)
         M(TYPE_DOUBLE, double, vectorized::ColumnVector<vectorized::Float64>)
 #undef M
+    case TYPE_CHAR: {
+        std::string data = _jobject_to_string(env, jobj);
+        // Now have test pg and oracle with char(100), if data='abc'
+        // but read string data length is 100, so need trim extra spaces
+        if ((_conn_param.table_type == TOdbcTableType::POSTGRESQL) ||
+            (_conn_param.table_type == TOdbcTableType::ORACLE)) {
+            data = data.erase(data.find_last_not_of(' ') + 1);
+        }
+        reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(data.c_str(),
+                                                                          data.length());
+        break;
+    }
     case TYPE_STRING:
-    case TYPE_CHAR:
     case TYPE_VARCHAR: {
         std::string data = _jobject_to_string(env, jobj);
         reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(data.c_str(),
diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h
index e6da1a0151..84ca17e02c 100644
--- a/be/src/vec/exec/vjdbc_connector.h
+++ b/be/src/vec/exec/vjdbc_connector.h
@@ -33,6 +33,7 @@ struct JdbcConnectorParam {
     std::string user;
     std::string passwd;
     std::string query_string;
+    TOdbcTableType::type table_type;
 
     const TupleDescriptor* tuple_desc;
 };
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java
index b9da4b45c0..c15a12371c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java
@@ -195,6 +195,7 @@ public class JdbcScanNode extends ScanNode {
         msg.jdbc_scan_node.setTupleId(desc.getId().asInt());
         msg.jdbc_scan_node.setTableName(tableName);
         msg.jdbc_scan_node.setQueryString(getJdbcQueryStr());
+        msg.jdbc_scan_node.setTableType(jdbcType);
     }
 
     @Override
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index a7a21a0499..613c884a06 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -406,6 +406,7 @@ struct TJdbcScanNode {
   1: optional Types.TTupleId tuple_id
   2: optional string table_name
   3: optional string query_string
+  4: optional Types.TOdbcTableType table_type
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 01/05: [feature](BE)pad missed version with empty rowset (#15030)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 740363019c36bb1bb76a9c7b2ad2ce193ae0b66e
Author: AlexYue <yj...@qq.com>
AuthorDate: Thu Dec 29 11:20:44 2022 +0800

    [feature](BE)pad missed version with empty rowset (#15030)
    
    If all replicas of one tablet are broken, user can use this http api to pad the missed version with empty rowset.
---
 be/src/http/CMakeLists.txt                         |   1 +
 be/src/http/action/pad_rowset_action.cpp           | 105 +++++++++++++++++++++
 be/src/http/action/pad_rowset_action.h             |  44 +++++++++
 be/src/service/http_service.cpp                    |   4 +
 be/test/olap/tablet_test.cpp                       |  55 +++++++++++
 .../docs/admin-manual/http-actions/pad-rowset.md   |  41 ++++++++
 docs/sidebars.json                                 |   1 +
 .../docs/admin-manual/http-actions/pad_rowset.md   |  43 +++++++++
 8 files changed, 294 insertions(+)

diff --git a/be/src/http/CMakeLists.txt b/be/src/http/CMakeLists.txt
index a23d679391..e38ff4d1ee 100644
--- a/be/src/http/CMakeLists.txt
+++ b/be/src/http/CMakeLists.txt
@@ -35,6 +35,7 @@ add_library(Webserver STATIC
   http_client.cpp
   action/download_action.cpp
   action/monitor_action.cpp
+  action/pad_rowset_action.cpp
   action/health_action.cpp
   action/tablet_migration_action.cpp
   action/tablets_info_action.cpp
diff --git a/be/src/http/action/pad_rowset_action.cpp b/be/src/http/action/pad_rowset_action.cpp
new file mode 100644
index 0000000000..df2721f50b
--- /dev/null
+++ b/be/src/http/action/pad_rowset_action.cpp
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "http/action/pad_rowset_action.h"
+
+#include <memory>
+#include <mutex>
+
+#include "http/http_channel.h"
+#include "olap/olap_common.h"
+#include "olap/rowset/beta_rowset_writer.h"
+#include "olap/rowset/rowset.h"
+#include "olap/storage_engine.h"
+
+namespace doris {
+
+const std::string TABLET_ID = "tablet_id";
+const std::string START_VERSION = "start_version";
+const std::string END_VERSION = "end_version";
+
+Status check_one_param(const std::string& param_val, const std::string& param_name) {
+    if (param_val.empty()) {
+        return Status::InternalError("paramater {} not specified in url", param_name);
+    }
+    return Status::OK();
+}
+
+void PadRowsetAction::handle(HttpRequest* req) {
+    LOG(INFO) << "accept one request " << req->debug_string();
+    Status status = _handle(req);
+    std::string result = status.to_json();
+    LOG(INFO) << "handle request result:" << result;
+    if (status.ok()) {
+        HttpChannel::send_reply(req, HttpStatus::OK, result);
+    } else {
+        HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, result);
+    }
+}
+
+Status PadRowsetAction::check_param(HttpRequest* req) {
+    RETURN_IF_ERROR(check_one_param(req->param(TABLET_ID), TABLET_ID));
+    RETURN_IF_ERROR(check_one_param(req->param(START_VERSION), START_VERSION));
+    RETURN_IF_ERROR(check_one_param(req->param(END_VERSION), END_VERSION));
+    return Status::OK();
+}
+
+Status PadRowsetAction::_handle(HttpRequest* req) {
+    RETURN_IF_ERROR(check_param(req));
+
+    const std::string& tablet_id_str = req->param(TABLET_ID);
+    const std::string& start_version_str = req->param(START_VERSION);
+    const std::string& end_version_str = req->param(END_VERSION);
+
+    // valid str format
+    int64_t tablet_id = std::atol(tablet_id_str.c_str());
+    int32_t start_version = std::atoi(start_version_str.c_str());
+    int32_t end_version = std::atoi(end_version_str.c_str());
+    if (start_version < 0 || end_version < 0 || end_version < start_version) {
+        return Status::InternalError("Invalid input version");
+    }
+
+    auto tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
+    if (nullptr == tablet) {
+        return Status::InternalError("Unknown tablet id {}", tablet_id);
+    }
+    return _pad_rowset(tablet, Version(start_version, end_version));
+}
+
+Status PadRowsetAction::_pad_rowset(TabletSharedPtr tablet, const Version& version) {
+    if (tablet->check_version_exist(version)) {
+        return Status::InternalError("Input version {} exists", version.to_string());
+    }
+
+    std::unique_ptr<RowsetWriter> writer;
+    RETURN_IF_ERROR(tablet->create_rowset_writer(version, VISIBLE, NONOVERLAPPING,
+                                                 tablet->tablet_schema(), -1, -1, &writer));
+    auto rowset = writer->build();
+    rowset->make_visible(version);
+
+    std::vector<RowsetSharedPtr> to_add {rowset};
+    std::vector<RowsetSharedPtr> to_delete;
+    {
+        std::unique_lock wlock(tablet->get_header_lock());
+        tablet->modify_rowsets(to_add, to_delete);
+        tablet->save_meta();
+    }
+
+    return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/http/action/pad_rowset_action.h b/be/src/http/action/pad_rowset_action.h
new file mode 100644
index 0000000000..f6036dc9fa
--- /dev/null
+++ b/be/src/http/action/pad_rowset_action.h
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "http/http_handler.h"
+#include "http/http_request.h"
+#include "olap/tablet.h"
+
+namespace doris {
+
+class PadRowsetAction : public HttpHandler {
+public:
+    PadRowsetAction() = default;
+
+    ~PadRowsetAction() override = default;
+
+    void handle(HttpRequest* req) override;
+
+private:
+    Status _handle(HttpRequest* req);
+    Status check_param(HttpRequest* req);
+
+#ifdef BE_TEST
+public:
+#endif
+    Status _pad_rowset(TabletSharedPtr tablet, const Version& version);
+};
+} // end namespace doris
\ No newline at end of file
diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp
index b62e54e6b1..06334bde29 100644
--- a/be/src/service/http_service.cpp
+++ b/be/src/service/http_service.cpp
@@ -26,6 +26,7 @@
 #include "http/action/health_action.h"
 #include "http/action/meta_action.h"
 #include "http/action/metrics_action.h"
+#include "http/action/pad_rowset_action.h"
 #include "http/action/pprof_actions.h"
 #include "http/action/reload_tablet_action.h"
 #include "http/action/reset_rpc_channel_action.h"
@@ -180,6 +181,9 @@ Status HttpService::start() {
     _ev_http_server->register_handler(HttpMethod::POST, "/api/check_tablet_segment_lost",
                                       check_tablet_segment_action);
 
+    PadRowsetAction* pad_rowset_action = _pool.add(new PadRowsetAction());
+    _ev_http_server->register_handler(HttpMethod::POST, "api/pad_rowset", pad_rowset_action);
+
     _ev_http_server->start();
     return Status::OK();
 }
diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp
index 6e9990dd3b..b29443776b 100644
--- a/be/test/olap/tablet_test.cpp
+++ b/be/test/olap/tablet_test.cpp
@@ -21,6 +21,7 @@
 
 #include <sstream>
 
+#include "http/action/pad_rowset_action.h"
 #include "olap/olap_define.h"
 #include "olap/rowset/beta_rowset.h"
 #include "olap/storage_engine.h"
@@ -28,6 +29,7 @@
 #include "olap/tablet_meta.h"
 #include "olap/tablet_schema_cache.h"
 #include "testutil/mock_rowset.h"
+#include "util/file_utils.h"
 #include "util/time.h"
 
 using namespace std;
@@ -37,6 +39,8 @@ namespace doris {
 using RowsetMetaSharedContainerPtr = std::shared_ptr<std::vector<RowsetMetaSharedPtr>>;
 
 static StorageEngine* k_engine = nullptr;
+static const std::string kTestDir = "/data_test/data/tablet_test";
+static const uint32_t MAX_PATH_LEN = 1024;
 
 class TestTablet : public testing::Test {
 public:
@@ -92,6 +96,17 @@ public:
                 }]
             }
         })";
+        char buffer[MAX_PATH_LEN];
+        EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+        absolute_dir = std::string(buffer) + kTestDir;
+
+        if (FileUtils::check_exist(absolute_dir)) {
+            EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok());
+        }
+        EXPECT_TRUE(FileUtils::create_dir(absolute_dir).ok());
+        EXPECT_TRUE(FileUtils::create_dir(absolute_dir + "/tablet_path").ok());
+        _data_dir = std::make_unique<DataDir>(absolute_dir);
+        _data_dir->update_capacity();
 
         doris::EngineOptions options;
         k_engine = new StorageEngine(options);
@@ -99,6 +114,9 @@ public:
     }
 
     void TearDown() override {
+        if (FileUtils::check_exist(absolute_dir)) {
+            EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok());
+        }
         if (k_engine != nullptr) {
             k_engine->stop();
             delete k_engine;
@@ -224,6 +242,8 @@ public:
 protected:
     std::string _json_rowset_meta;
     TabletMetaSharedPtr _tablet_meta;
+    string absolute_dir;
+    std::unique_ptr<DataDir> _data_dir;
 };
 
 TEST_F(TestTablet, delete_expired_stale_rowset) {
@@ -252,6 +272,41 @@ TEST_F(TestTablet, delete_expired_stale_rowset) {
     _tablet.reset();
 }
 
+TEST_F(TestTablet, pad_rowset) {
+    std::vector<RowsetMetaSharedPtr> rs_metas;
+    auto ptr1 = std::make_shared<RowsetMeta>();
+    init_rs_meta(ptr1, 1, 2);
+    rs_metas.push_back(ptr1);
+    RowsetSharedPtr rowset1 = make_shared<BetaRowset>(nullptr, "", ptr1);
+
+    auto ptr2 = std::make_shared<RowsetMeta>();
+    init_rs_meta(ptr2, 3, 4);
+    rs_metas.push_back(ptr2);
+    RowsetSharedPtr rowset2 = make_shared<BetaRowset>(nullptr, "", ptr2);
+
+    auto ptr3 = std::make_shared<RowsetMeta>();
+    init_rs_meta(ptr3, 6, 7);
+    rs_metas.push_back(ptr3);
+    RowsetSharedPtr rowset3 = make_shared<BetaRowset>(nullptr, "", ptr3);
+
+    for (auto& rowset : rs_metas) {
+        _tablet_meta->add_rs_meta(rowset);
+    }
+
+    _data_dir->init();
+    TabletSharedPtr _tablet(new Tablet(_tablet_meta, _data_dir.get()));
+    _tablet->init();
+
+    Version version(5, 5);
+    std::vector<RowsetReaderSharedPtr> readers;
+    ASSERT_FALSE(_tablet->capture_rs_readers(version, &readers).ok());
+    readers.clear();
+
+    PadRowsetAction action;
+    action._pad_rowset(_tablet, version);
+    ASSERT_TRUE(_tablet->capture_rs_readers(version, &readers).ok());
+}
+
 TEST_F(TestTablet, cooldown_policy) {
     std::vector<RowsetMetaSharedPtr> rs_metas;
     RowsetMetaSharedPtr ptr1(new RowsetMeta());
diff --git a/docs/en/docs/admin-manual/http-actions/pad-rowset.md b/docs/en/docs/admin-manual/http-actions/pad-rowset.md
new file mode 100644
index 0000000000..9ff6b89053
--- /dev/null
+++ b/docs/en/docs/admin-manual/http-actions/pad-rowset.md
@@ -0,0 +1,41 @@
+---
+{
+    "title": "PAD ROWSET",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# PAD ROWSET
+## description
+   
+    Pad one empty rowset as one substitute for error replica.
+
+    METHOD: POST
+    URI: http://be_host:be_http_port/api/pad_rowset?tablet_id=xxx&start_version=xxx&end_version=xxx
+
+## example
+
+    curl -X POST "http://hostname:8088/api/pad_rowset?tablet_id=123456\&start_version=1111111\$end_version=1111112"
+
+## keyword
+
+    ROWSET,TABLET,ROWSET,TABLET
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 71dd56628d..0dfcd8a982 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -1105,6 +1105,7 @@
                             ]
                         },
                         "admin-manual/http-actions/restore-tablet",
+                        "admin-manual/http-actions/pad-rowset",
                         "admin-manual/http-actions/get-load-state",
                         "admin-manual/http-actions/tablet-migration-action",
                         "admin-manual/http-actions/cancel-label",
diff --git a/docs/zh-CN/docs/admin-manual/http-actions/pad_rowset.md b/docs/zh-CN/docs/admin-manual/http-actions/pad_rowset.md
new file mode 100644
index 0000000000..dba241fca7
--- /dev/null
+++ b/docs/zh-CN/docs/admin-manual/http-actions/pad_rowset.md
@@ -0,0 +1,43 @@
+---
+{
+    "title": "PAD ROWSET",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# PAD ROWSET
+## description
+   
+    该功能用于使用一个空的rowset填充损坏的副本。
+
+    说明:这个功能暂时只在be服务中提供一个http接口。如果要使用,
+    需要向要进行数据恢复的那台be机器的http端口发送pad rowset api请求。api格式如下:
+    METHOD: POST
+    URI: http://be_host:be_http_port/api/pad_rowset?tablet_id=xxx&start_version=xxx&end_version=xxx
+
+## example
+
+    curl -X POST "http://hostname:8088/api/pad_rowset?tablet_id=123456\&start_version=1111111\&end_version=1111112"
+
+## keyword
+
+    PAD,ROWSET,PAD,ROWSET


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 04/05: [Improvement](meta) update show create function result (#15414)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 5b99fee593e41bf63c3832bc016f9fa67391dc4a
Author: Yulei-Yang <yu...@gmail.com>
AuthorDate: Thu Dec 29 11:40:14 2022 +0800

    [Improvement](meta) update show create function result (#15414)
    
    currently, show create function is designed for native function, it has some non suitable points.
    
    this pr bring several improvements, and make result of show create function can be used to create function.
    
    1. add type property.
    2. add ALWAYS_NULLABLE perperty for java_udf
    3. use file property rather than object_file for java_udf, follow usage of create java_udf
    4. remove md5 property, coz file may vary when create function again.
    5. remove INIT_FN,UPDATE_FN,MERGE_FN,SERIALIZE_FN etc properties for java_udf, cos java_udf does not need these properties.
---
 .../apache/doris/catalog/AggregateFunction.java    | 35 ++++++++++++++--------
 .../org/apache/doris/catalog/ScalarFunction.java   | 14 +++++++--
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
index 0729b0aa03..07996451d3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
@@ -559,23 +559,32 @@ public class AggregateFunction extends Function {
             sb.append(" INTERMEDIATE " + getIntermediateType());
         }
 
-        sb.append(" PROPERTIES (")
-                .append("\n  \"INIT_FN\"=\"" + getInitFnSymbol() + "\"")
-                .append(",\n  \"UPDATE_FN\"=\"" + getUpdateFnSymbol() + "\"")
-                .append(",\n  \"MERGE_FN\"=\"" + getMergeFnSymbol() + "\"");
-        if (getSerializeFnSymbol() != null) {
-            sb.append(",\n  \"SERIALIZE_FN\"=\"" + getSerializeFnSymbol() + "\"");
-        }
-        if (getFinalizeFnSymbol() != null) {
-            sb.append(",\n  \"FINALIZE_FN\"=\"" + getFinalizeFnSymbol() + "\"");
+        sb.append(" PROPERTIES (");
+        if (getBinaryType() != TFunctionBinaryType.JAVA_UDF) {
+            sb.append("\n  \"INIT_FN\"=\"" + getInitFnSymbol() + "\",")
+                    .append("\n  \"UPDATE_FN\"=\"" + getUpdateFnSymbol() + "\",")
+                    .append("\n  \"MERGE_FN\"=\"" + getMergeFnSymbol() + "\",");
+            if (getSerializeFnSymbol() != null) {
+                sb.append("\n  \"SERIALIZE_FN\"=\"" + getSerializeFnSymbol() + "\",");
+            }
+            if (getFinalizeFnSymbol() != null) {
+                sb.append("\n  \"FINALIZE_FN\"=\"" + getFinalizeFnSymbol() + "\",");
+            }
         }
         if (getSymbolName() != null) {
-            sb.append(",\n  \"SYMBOL\"=\"" + getSymbolName() + "\"");
+            sb.append("\n  \"SYMBOL\"=\"" + getSymbolName() + "\",");
         }
 
-        sb.append(",\n  \"OBJECT_FILE\"=")
-                .append("\"" + (getLocation() == null ? "" : getLocation().toString()) + "\"");
-        sb.append(",\n  \"MD5\"=").append("\"" + getChecksum() + "\"");
+        if (getBinaryType() == TFunctionBinaryType.JAVA_UDF) {
+            sb.append("\n  \"FILE\"=")
+                    .append("\"" + (getLocation() == null ? "" : getLocation().toString()) + "\",");
+            boolean isReturnNull = this.getNullableMode() == NullableMode.ALWAYS_NULLABLE;
+            sb.append("\n  \"ALWAYS_NULLABLE\"=").append("\"" + isReturnNull + "\",");
+        } else {
+            sb.append("\n  \"OBJECT_FILE\"=")
+                    .append("\"" + (getLocation() == null ? "" : getLocation().toString()) + "\",");
+        }
+        sb.append("\n  \"TYPE\"=").append("\"" + this.getBinaryType() + "\"");
         sb.append("\n);");
         return sb.toString();
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java
index 148faa8ecd..c8e89cae1a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java
@@ -389,9 +389,17 @@ public class ScalarFunction extends Function {
         if (getCloseFnSymbol() != null) {
             sb.append(",\n  \"CLOSE_FN\"=").append("\"" + getCloseFnSymbol() + "\"");
         }
-        sb.append(",\n  \"OBJECT_FILE\"=")
-                .append("\"" + (getLocation() == null ? "" : getLocation().toString()) + "\"");
-        sb.append(",\n  \"MD5\"=").append("\"" + getChecksum() + "\"");
+
+        if (getBinaryType() == TFunctionBinaryType.JAVA_UDF) {
+            sb.append(",\n  \"FILE\"=")
+                    .append("\"" + (getLocation() == null ? "" : getLocation().toString()) + "\"");
+            boolean isReturnNull = this.getNullableMode() == NullableMode.ALWAYS_NULLABLE;
+            sb.append(",\n  \"ALWAYS_NULLABLE\"=").append("\"" + isReturnNull + "\"");
+        } else {
+            sb.append(",\n  \"OBJECT_FILE\"=")
+                    .append("\"" + (getLocation() == null ? "" : getLocation().toString()) + "\"");
+        }
+        sb.append(",\n  \"TYPE\"=").append("\"" + this.getBinaryType() + "\"");
         sb.append("\n);");
         return sb.toString();
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 05/05: [fix](multi catalog)Set column defualt value for query. (#15415)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit be513e1062efca1988a26f21339a643b42032742
Author: Jibing-Li <64...@users.noreply.github.com>
AuthorDate: Thu Dec 29 12:03:17 2022 +0800

    [fix](multi catalog)Set column defualt value for query. (#15415)
    
    Current column default value is used only for load task. But in the case of Iceberg schema change,
    query task is also possible to read the default value for columns not exist in old schema.
    This pr is to support default value for query task.
    
    Manually tested the broker load and external emr regression cases.
---
 be/src/vec/exec/scan/vfile_scanner.cpp | 39 +++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp
index 8599029cf3..e675583bd3 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -654,8 +654,27 @@ Status VFileScanner::_init_expr_ctxes() {
         }
     }
 
+    // set column name to default value expr map
+    for (auto slot_desc : _real_tuple_desc->slots()) {
+        if (!slot_desc->is_materialized()) {
+            continue;
+        }
+        vectorized::VExprContext* ctx = nullptr;
+        auto it = _params.default_value_of_src_slot.find(slot_desc->id());
+        if (it != std::end(_params.default_value_of_src_slot)) {
+            if (!it->second.nodes.empty()) {
+                RETURN_IF_ERROR(
+                        vectorized::VExpr::create_expr_tree(_state->obj_pool(), it->second, &ctx));
+                RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc));
+                RETURN_IF_ERROR(ctx->open(_state));
+            }
+            // if expr is empty, the default value will be null
+            _col_default_value_ctx.emplace(slot_desc->col_name(), ctx);
+        }
+    }
+
     if (_is_load) {
-        // follow desc expr map and src default value expr map is only for load task.
+        // follow desc expr map is only for load task.
         bool has_slot_id_map = _params.__isset.dest_sid_to_src_sid_without_trans;
         int idx = 0;
         for (auto slot_desc : _output_tuple_desc->slots()) {
@@ -694,24 +713,6 @@ Status VFileScanner::_init_expr_ctxes() {
                 }
             }
         }
-
-        for (auto slot_desc : _real_tuple_desc->slots()) {
-            if (!slot_desc->is_materialized()) {
-                continue;
-            }
-            vectorized::VExprContext* ctx = nullptr;
-            auto it = _params.default_value_of_src_slot.find(slot_desc->id());
-            if (it != std::end(_params.default_value_of_src_slot)) {
-                if (!it->second.nodes.empty()) {
-                    RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_state->obj_pool(),
-                                                                        it->second, &ctx));
-                    RETURN_IF_ERROR(ctx->prepare(_state, *_default_val_row_desc));
-                    RETURN_IF_ERROR(ctx->open(_state));
-                }
-                // if expr is empty, the default value will be null
-                _col_default_value_ctx.emplace(slot_desc->col_name(), ctx);
-            }
-        }
     }
     return Status::OK();
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org