You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/14 05:06:18 UTC

[doris] 03/06: [Vectorized](jdbc) add check type for jdbc table (#14501)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8689265ea7f82db17a719286dde156f2a33ec617
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Thu Dec 8 10:27:47 2022 +0800

    [Vectorized](jdbc) add check type for jdbc table (#14501)
---
 be/src/vec/exec/vjdbc_connector.cpp                | 131 +++++++++++++++++++++
 be/src/vec/exec/vjdbc_connector.h                  |   4 +
 .../docs/ecosystem/external-table/jdbc-of-doris.md |  31 +++--
 .../docs/ecosystem/external-table/jdbc-of-doris.md |  30 ++---
 .../java/org/apache/doris/udf/JdbcExecutor.java    |  14 +++
 .../suites/jdbc_p0/test_jdbc_query_pg.groovy       |   4 +-
 6 files changed, 181 insertions(+), 33 deletions(-)

diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp
index f3a7688d38..ce171ad569 100644
--- a/be/src/vec/exec/vjdbc_connector.cpp
+++ b/be/src/vec/exec/vjdbc_connector.cpp
@@ -22,6 +22,7 @@
 #include "gen_cpp/Types_types.h"
 #include "gutil/strings/substitute.h"
 #include "jni.h"
+#include "runtime/define_primitive_type.h"
 #include "runtime/user_function_cache.h"
 #include "util/jni-util.h"
 #include "vec/columns/column_nullable.h"
@@ -34,6 +35,7 @@ const char* JDBC_EXECUTOR_CTOR_SIGNATURE = "([B)V";
 const char* JDBC_EXECUTOR_WRITE_SIGNATURE = "(Ljava/lang/String;)I";
 const char* JDBC_EXECUTOR_HAS_NEXT_SIGNATURE = "()Z";
 const char* JDBC_EXECUTOR_GET_BLOCK_SIGNATURE = "(I)Ljava/util/List;";
+const char* JDBC_EXECUTOR_GET_TYPES_SIGNATURE = "()Ljava/util/List;";
 const char* JDBC_EXECUTOR_CLOSE_SIGNATURE = "()V";
 const char* JDBC_EXECUTOR_CONVERT_DATE_SIGNATURE = "(Ljava/lang/Object;)J";
 const char* JDBC_EXECUTOR_CONVERT_DATETIME_SIGNATURE = "(Ljava/lang/Object;)J";
@@ -172,6 +174,125 @@ Status JdbcConnector::query() {
     if (colunm_count != materialize_num) {
         return Status::InternalError("input and output column num not equal of jdbc query.");
     }
+    LOG(INFO) << "JdbcConnector::query has exec success: " << _sql_str;
+    RETURN_IF_ERROR(_check_column_type());
+    return Status::OK();
+}
+
+Status JdbcConnector::_check_column_type() {
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
+    jobject type_lists =
+            env->CallNonvirtualObjectMethod(_executor_obj, _executor_clazz, _executor_get_types_id);
+    auto column_size = _tuple_desc->slots().size();
+    for (int column_index = 0, materialized_column_index = 0; column_index < column_size;
+         ++column_index) {
+        auto slot_desc = _tuple_desc->slots()[column_index];
+        if (!slot_desc->is_materialized()) {
+            continue;
+        }
+        jobject column_type =
+                env->CallObjectMethod(type_lists, _executor_get_list_id, materialized_column_index);
+
+        const std::string& type_str = _jobject_to_string(env, column_type);
+        RETURN_IF_ERROR(_check_type(slot_desc, type_str));
+        env->DeleteLocalRef(column_type);
+        materialized_column_index++;
+    }
+    env->DeleteLocalRef(type_lists);
+    return JniUtil::GetJniExceptionMsg(env);
+}
+/* type mapping: https://doris.apache.org/zh-CN/docs/dev/ecosystem/external-table/jdbc-of-doris?_highlight=jdbc
+
+Doris            MYSQL                      PostgreSQL                  Oracle                      SQLServer
+
+BOOLEAN      java.lang.Boolean          java.lang.Boolean                                       java.lang.Boolean
+TINYINT      java.lang.Integer                                                                  java.lang.Short    
+SMALLINT     java.lang.Integer          java.lang.Integer           java.math.BigDecimal        java.lang.Short    
+INT          java.lang.Integer          java.lang.Integer           java.math.BigDecimal        java.lang.Integer
+BIGINT       java.lang.Long             java.lang.Long                                          java.lang.Long
+LARGET       java.math.BigInteger
+DECIMAL      java.math.BigDecimal       java.math.BigDecimal        java.math.BigDecimal        java.math.BigDecimal
+VARCHAR      java.lang.String           java.lang.String            java.lang.String            java.lang.String
+DOUBLE       java.lang.Double           java.lang.Double            java.lang.Double            java.lang.Double
+FLOAT        java.lang.Float            java.lang.Float                                         java.lang.Float
+DATE         java.sql.Date              java.sql.Date                                           java.sql.Date
+DATETIME     java.sql.Timestamp         java.sql.Timestamp          java.sql.Timestamp          java.sql.Timestamp
+
+NOTE: because oracle always use number(p,s) to create all numerical type, so it's java type maybe java.math.BigDecimal
+*/
+
+Status JdbcConnector::_check_type(SlotDescriptor* slot_desc, const std::string& type_str) {
+    const std::string error_msg = fmt::format(
+            "Fail to convert jdbc type of {} to doris type {} on column: {}. You need to "
+            "check this column type between external table and doris table.",
+            type_str, slot_desc->type().debug_string(), slot_desc->col_name());
+    switch (slot_desc->type().type) {
+    case TYPE_BOOLEAN: {
+        if (type_str != "java.lang.Boolean" && type_str != "java.math.BigDecimal") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT: {
+        if (type_str != "java.lang.Short" && type_str != "java.lang.Integer" &&
+            type_str != "java.math.BigDecimal") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    case TYPE_BIGINT:
+    case TYPE_LARGEINT: {
+        if (type_str != "java.lang.Long" && type_str != "java.math.BigDecimal" &&
+            type_str != "java.math.BigInteger") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    case TYPE_FLOAT: {
+        if (type_str != "java.lang.Float" && type_str != "java.math.BigDecimal") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    case TYPE_DOUBLE: {
+        if (type_str != "java.lang.Double" && type_str != "java.math.BigDecimal") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+    case TYPE_STRING: {
+        //now here break directly
+        break;
+    }
+    case TYPE_DATE:
+    case TYPE_DATEV2:
+    case TYPE_TIMEV2:
+    case TYPE_DATETIME:
+    case TYPE_DATETIMEV2: {
+        if (type_str != "java.sql.Timestamp" && type_str != "java.time.LocalDateTime" &&
+            type_str != "java.sql.Date") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    case TYPE_DECIMALV2:
+    case TYPE_DECIMAL32:
+    case TYPE_DECIMAL64:
+    case TYPE_DECIMAL128I: {
+        if (type_str != "java.math.BigDecimal") {
+            return Status::InternalError(error_msg);
+        }
+        break;
+    }
+    default: {
+        return Status::InternalError(error_msg);
+    }
+    }
     return Status::OK();
 }
 
@@ -262,6 +383,8 @@ Status JdbcConnector::_register_func_id(JNIEnv* env) {
                                 _executor_finish_trans_id));
     RETURN_IF_ERROR(register_id(_executor_clazz, "rollbackTrans",
                                 JDBC_EXECUTOR_TRANSACTION_SIGNATURE, _executor_abort_trans_id));
+    RETURN_IF_ERROR(register_id(_executor_clazz, "getResultColumnTypeNames",
+                                JDBC_EXECUTOR_GET_TYPES_SIGNATURE, _executor_get_types_id));
     return Status::OK();
 }
 
@@ -314,6 +437,14 @@ Status JdbcConnector::_convert_column_data(JNIEnv* env, jobject jobj,
         reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
         break;
     }
+    case TYPE_LARGEINT: {
+        StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+        std::string data = _jobject_to_string(env, jobj);
+        __int128 num =
+                StringParser::string_to_int<__int128>(data.data(), data.size(), &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
+        break;
+    }
     case TYPE_DECIMALV2: {
         std::string data = _jobject_to_string(env, jobj);
         DecimalV2Value decimal_slot;
diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h
index f3575dfd52..e6da1a0151 100644
--- a/be/src/vec/exec/vjdbc_connector.h
+++ b/be/src/vec/exec/vjdbc_connector.h
@@ -19,6 +19,7 @@
 
 #include <jni.h>
 
+#include "common/status.h"
 #include "exec/table_connector.h"
 
 namespace doris {
@@ -60,6 +61,8 @@ public:
 
 private:
     Status _register_func_id(JNIEnv* env);
+    Status _check_column_type();
+    Status _check_type(SlotDescriptor*, const std::string& type_str);
     Status _convert_column_data(JNIEnv* env, jobject jobj, const SlotDescriptor* slot_desc,
                                 vectorized::IColumn* column_ptr);
     std::string _jobject_to_string(JNIEnv* env, jobject jobj);
@@ -78,6 +81,7 @@ private:
     jmethodID _executor_read_id;
     jmethodID _executor_has_next_id;
     jmethodID _executor_get_blocks_id;
+    jmethodID _executor_get_types_id;
     jmethodID _executor_close_id;
     jmethodID _executor_get_list_id;
     jmethodID _executor_get_list_size_id;
diff --git a/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md b/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md
index 552cd7e848..8d19f7ffab 100644
--- a/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md
+++ b/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md
@@ -162,16 +162,16 @@ There are different data types among different databases. Here is a list of the
 |  MySQL   |  Doris   |
 | :------: | :------: |
 | BOOLEAN  | BOOLEAN  |
-|   CHAR   |   CHAR   |
-| VARCHAR  | VARCHAR  |
-|   DATE   |   DATE   |
-|  FLOAT   |  FLOAT   |
 | TINYINT  | TINYINT  |
 | SMALLINT | SMALLINT |
 |   INT    |   INT    |
 |  BIGINT  |  BIGINT  |
-|  DOUBLE  |  DOUBLE  |
+|BIGINT UNSIGNED|LARGEINT|
+| VARCHAR  | VARCHAR  |
+|   DATE   |   DATE   |
+|  FLOAT   |  FLOAT   |
 | DATETIME | DATETIME |
+|  DOUBLE  |  DOUBLE  |
 | DECIMAL  | DECIMAL  |
 
 
@@ -180,43 +180,42 @@ There are different data types among different databases. Here is a list of the
 |    PostgreSQL    |  Doris   |
 | :--------------: | :------: |
 |     BOOLEAN      | BOOLEAN  |
-|       CHAR       |   CHAR   |
-|     VARCHAR      | VARCHAR  |
-|       DATE       |   DATE   |
-|       REAL       |  FLOAT   |
 |     SMALLINT     | SMALLINT |
 |       INT        |   INT    |
 |      BIGINT      |  BIGINT  |
-| DOUBLE PRECISION |  DOUBLE  |
+|     VARCHAR      | VARCHAR  |
+|       DATE       |   DATE   |
 |    TIMESTAMP     | DATETIME |
+|       REAL       |  FLOAT   |
+|      FLOAT       |  DOUBLE  |
 |     DECIMAL      | DECIMAL  |
 
 ### Oracle
 
 |  Oracle  |  Doris   |
 | :------: | :------: |
-|   CHAR   |   CHAR   |
 | VARCHAR  | VARCHAR  |
 |   DATE   | DATETIME |
 | SMALLINT | SMALLINT |
 |   INT    |   INT    |
+|   REAL   |   FLOAT  |
+|   FLOAT  |   DOUBLE |
 |  NUMBER  | DECIMAL  |
 
-
 ### SQL server
 
 | SQLServer |  Doris   |
 | :-------: | :------: |
 |    BIT    | BOOLEAN  |
-|   CHAR    |   CHAR   |
-|  VARCHAR  | VARCHAR  |
-|   DATE    |   DATE   |
-|   REAL    |  FLOAT   |
 |  TINYINT  | TINYINT  |
 | SMALLINT  | SMALLINT |
 |    INT    |   INT    |
 |  BIGINT   |  BIGINT  |
+|  VARCHAR  | VARCHAR  |
+|   DATE    |   DATE   |
 | DATETIME  | DATETIME |
+|   REAL    |  FLOAT   |
+|   FLOAT   |  DOUBLE  |
 |  DECIMAL  | DECIMAL  |
 
 ### ClickHouse
diff --git a/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md b/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md
index d333e8850f..4ef47e1d66 100644
--- a/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md
+++ b/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md
@@ -165,16 +165,16 @@ PROPERTIES (
 |  MySQL   |  Doris   |
 | :------: | :------: |
 | BOOLEAN  | BOOLEAN  |
-|   CHAR   |   CHAR   |
-| VARCHAR  | VARCHAR  |
-|   DATE   |   DATE   |
-|  FLOAT   |  FLOAT   |
 | TINYINT  | TINYINT  |
 | SMALLINT | SMALLINT |
 |   INT    |   INT    |
 |  BIGINT  |  BIGINT  |
-|  DOUBLE  |  DOUBLE  |
+|BIGINT UNSIGNED|LARGEINT|
+| VARCHAR  | VARCHAR  |
+|   DATE   |   DATE   |
+|  FLOAT   |  FLOAT   |
 | DATETIME | DATETIME |
+|  DOUBLE  |  DOUBLE  |
 | DECIMAL  | DECIMAL  |
 
 
@@ -183,26 +183,26 @@ PROPERTIES (
 |    PostgreSQL    |  Doris   |
 | :--------------: | :------: |
 |     BOOLEAN      | BOOLEAN  |
-|       CHAR       |   CHAR   |
-|     VARCHAR      | VARCHAR  |
-|       DATE       |   DATE   |
-|       REAL       |  FLOAT   |
 |     SMALLINT     | SMALLINT |
 |       INT        |   INT    |
 |      BIGINT      |  BIGINT  |
-| DOUBLE PRECISION |  DOUBLE  |
+|     VARCHAR      | VARCHAR  |
+|       DATE       |   DATE   |
 |    TIMESTAMP     | DATETIME |
+|       REAL       |  FLOAT   |
+|      FLOAT       |  DOUBLE  |
 |     DECIMAL      | DECIMAL  |
 
 ### Oracle
 
 |  Oracle  |  Doris   |
 | :------: | :------: |
-|   CHAR   |   CHAR   |
 | VARCHAR  | VARCHAR  |
 |   DATE   | DATETIME |
 | SMALLINT | SMALLINT |
 |   INT    |   INT    |
+|   REAL   |   FLOAT  |
+|   FLOAT  |   DOUBLE |
 |  NUMBER  | DECIMAL  |
 
 
@@ -211,15 +211,15 @@ PROPERTIES (
 | SQLServer |  Doris   |
 | :-------: | :------: |
 |    BIT    | BOOLEAN  |
-|   CHAR    |   CHAR   |
-|  VARCHAR  | VARCHAR  |
-|   DATE    |   DATE   |
-|   REAL    |  FLOAT   |
 |  TINYINT  | TINYINT  |
 | SMALLINT  | SMALLINT |
 |    INT    |   INT    |
 |  BIGINT   |  BIGINT  |
+|  VARCHAR  | VARCHAR  |
+|   DATE    |   DATE   |
 | DATETIME  | DATETIME |
+|   REAL    |  FLOAT   |
+|   FLOAT   |  DOUBLE  |
 |  DECIMAL  | DECIMAL  |
 
 ### ClickHouse
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
index 0a267bd9a6..9539f65aba 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
@@ -53,6 +53,7 @@ public class JdbcExecutor {
     private ResultSetMetaData resultSetMetaData = null;
     // Use HikariDataSource to help us manage the JDBC connections.
     private HikariDataSource dataSource = null;
+    private List<String> resultColumnTypeNames = null;
 
     public JdbcExecutor(byte[] thriftParams) throws Exception {
         TJdbcExecutorCtorParams request = new TJdbcExecutorCtorParams();
@@ -110,6 +111,19 @@ public class JdbcExecutor {
         }
     }
 
+    public List<String> getResultColumnTypeNames() throws UdfRuntimeException {
+        try {
+            int count = resultSetMetaData.getColumnCount();
+            resultColumnTypeNames = new ArrayList<>(count);
+            for (int i = 0; i < count; ++i) {
+                resultColumnTypeNames.add(resultSetMetaData.getColumnClassName(i + 1));
+            }
+            return resultColumnTypeNames;
+        } catch (SQLException e) {
+            throw new UdfRuntimeException("JDBC executor getResultColumnTypeNames has error: ", e);
+        }
+    }
+
     public void openTrans() throws UdfRuntimeException {
         try {
             if (conn != null) {
diff --git a/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy b/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy
index b84ee57072..a18156527c 100644
--- a/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy
+++ b/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy
@@ -55,7 +55,7 @@ suite("test_jdbc_query_pg", "p0") {
                 k2 char(100),
                 k3 varchar(128),
                 k4 date,
-                k5 double,
+                k5 float,
                 k6 smallint,
                 k7 int,
                 k8 bigint,
@@ -209,7 +209,7 @@ suite("test_jdbc_query_pg", "p0") {
                 `m_time` DATETIME NULL,
                 `app_id` BIGINT(20) NULL,
                 `t_id` BIGINT(20) NULL,
-                `deleted` TEXT NULL,
+                `deleted` boolean NULL,
                 `w_t_s` DATETIME NULL,
                 `rf_id` TEXT NULL,
                 `e_info` TEXT NULL,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org