You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/14 05:06:18 UTC
[doris] 03/06: [Vectorized](jdbc) add check type for jdbc table (#14501)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8689265ea7f82db17a719286dde156f2a33ec617
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Thu Dec 8 10:27:47 2022 +0800
[Vectorized](jdbc) add check type for jdbc table (#14501)
---
be/src/vec/exec/vjdbc_connector.cpp | 131 +++++++++++++++++++++
be/src/vec/exec/vjdbc_connector.h | 4 +
.../docs/ecosystem/external-table/jdbc-of-doris.md | 31 +++--
.../docs/ecosystem/external-table/jdbc-of-doris.md | 30 ++---
.../java/org/apache/doris/udf/JdbcExecutor.java | 14 +++
.../suites/jdbc_p0/test_jdbc_query_pg.groovy | 4 +-
6 files changed, 181 insertions(+), 33 deletions(-)
diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp
index f3a7688d38..ce171ad569 100644
--- a/be/src/vec/exec/vjdbc_connector.cpp
+++ b/be/src/vec/exec/vjdbc_connector.cpp
@@ -22,6 +22,7 @@
#include "gen_cpp/Types_types.h"
#include "gutil/strings/substitute.h"
#include "jni.h"
+#include "runtime/define_primitive_type.h"
#include "runtime/user_function_cache.h"
#include "util/jni-util.h"
#include "vec/columns/column_nullable.h"
@@ -34,6 +35,7 @@ const char* JDBC_EXECUTOR_CTOR_SIGNATURE = "([B)V";
const char* JDBC_EXECUTOR_WRITE_SIGNATURE = "(Ljava/lang/String;)I";
const char* JDBC_EXECUTOR_HAS_NEXT_SIGNATURE = "()Z";
const char* JDBC_EXECUTOR_GET_BLOCK_SIGNATURE = "(I)Ljava/util/List;";
+const char* JDBC_EXECUTOR_GET_TYPES_SIGNATURE = "()Ljava/util/List;";
const char* JDBC_EXECUTOR_CLOSE_SIGNATURE = "()V";
const char* JDBC_EXECUTOR_CONVERT_DATE_SIGNATURE = "(Ljava/lang/Object;)J";
const char* JDBC_EXECUTOR_CONVERT_DATETIME_SIGNATURE = "(Ljava/lang/Object;)J";
@@ -172,6 +174,125 @@ Status JdbcConnector::query() {
if (colunm_count != materialize_num) {
return Status::InternalError("input and output column num not equal of jdbc query.");
}
+ LOG(INFO) << "JdbcConnector::query has exec success: " << _sql_str;
+ RETURN_IF_ERROR(_check_column_type());
+ return Status::OK();
+}
+
+Status JdbcConnector::_check_column_type() {
+ JNIEnv* env = nullptr;
+ RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
+ jobject type_lists =
+ env->CallNonvirtualObjectMethod(_executor_obj, _executor_clazz, _executor_get_types_id);
+ auto column_size = _tuple_desc->slots().size();
+ for (int column_index = 0, materialized_column_index = 0; column_index < column_size;
+ ++column_index) {
+ auto slot_desc = _tuple_desc->slots()[column_index];
+ if (!slot_desc->is_materialized()) {
+ continue;
+ }
+ jobject column_type =
+ env->CallObjectMethod(type_lists, _executor_get_list_id, materialized_column_index);
+
+ const std::string& type_str = _jobject_to_string(env, column_type);
+ RETURN_IF_ERROR(_check_type(slot_desc, type_str));
+ env->DeleteLocalRef(column_type);
+ materialized_column_index++;
+ }
+ env->DeleteLocalRef(type_lists);
+ return JniUtil::GetJniExceptionMsg(env);
+}
+/* type mapping: https://doris.apache.org/zh-CN/docs/dev/ecosystem/external-table/jdbc-of-doris?_highlight=jdbc
+
+Doris MYSQL PostgreSQL Oracle SQLServer
+
+BOOLEAN java.lang.Boolean java.lang.Boolean java.lang.Boolean
+TINYINT java.lang.Integer java.lang.Short
+SMALLINT java.lang.Integer java.lang.Integer java.math.BigDecimal java.lang.Short
+INT java.lang.Integer java.lang.Integer java.math.BigDecimal java.lang.Integer
+BIGINT java.lang.Long java.lang.Long java.lang.Long
+LARGET java.math.BigInteger
+DECIMAL java.math.BigDecimal java.math.BigDecimal java.math.BigDecimal java.math.BigDecimal
+VARCHAR java.lang.String java.lang.String java.lang.String java.lang.String
+DOUBLE java.lang.Double java.lang.Double java.lang.Double java.lang.Double
+FLOAT java.lang.Float java.lang.Float java.lang.Float
+DATE java.sql.Date java.sql.Date java.sql.Date
+DATETIME java.sql.Timestamp java.sql.Timestamp java.sql.Timestamp java.sql.Timestamp
+
+NOTE: because oracle always use number(p,s) to create all numerical type, so it's java type maybe java.math.BigDecimal
+*/
+
+Status JdbcConnector::_check_type(SlotDescriptor* slot_desc, const std::string& type_str) {
+ const std::string error_msg = fmt::format(
+ "Fail to convert jdbc type of {} to doris type {} on column: {}. You need to "
+ "check this column type between external table and doris table.",
+ type_str, slot_desc->type().debug_string(), slot_desc->col_name());
+ switch (slot_desc->type().type) {
+ case TYPE_BOOLEAN: {
+ if (type_str != "java.lang.Boolean" && type_str != "java.math.BigDecimal") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ case TYPE_TINYINT:
+ case TYPE_SMALLINT:
+ case TYPE_INT: {
+ if (type_str != "java.lang.Short" && type_str != "java.lang.Integer" &&
+ type_str != "java.math.BigDecimal") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ case TYPE_BIGINT:
+ case TYPE_LARGEINT: {
+ if (type_str != "java.lang.Long" && type_str != "java.math.BigDecimal" &&
+ type_str != "java.math.BigInteger") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ case TYPE_FLOAT: {
+ if (type_str != "java.lang.Float" && type_str != "java.math.BigDecimal") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ case TYPE_DOUBLE: {
+ if (type_str != "java.lang.Double" && type_str != "java.math.BigDecimal") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ case TYPE_CHAR:
+ case TYPE_VARCHAR:
+ case TYPE_STRING: {
+ //now here break directly
+ break;
+ }
+ case TYPE_DATE:
+ case TYPE_DATEV2:
+ case TYPE_TIMEV2:
+ case TYPE_DATETIME:
+ case TYPE_DATETIMEV2: {
+ if (type_str != "java.sql.Timestamp" && type_str != "java.time.LocalDateTime" &&
+ type_str != "java.sql.Date") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ case TYPE_DECIMALV2:
+ case TYPE_DECIMAL32:
+ case TYPE_DECIMAL64:
+ case TYPE_DECIMAL128I: {
+ if (type_str != "java.math.BigDecimal") {
+ return Status::InternalError(error_msg);
+ }
+ break;
+ }
+ default: {
+ return Status::InternalError(error_msg);
+ }
+ }
return Status::OK();
}
@@ -262,6 +383,8 @@ Status JdbcConnector::_register_func_id(JNIEnv* env) {
_executor_finish_trans_id));
RETURN_IF_ERROR(register_id(_executor_clazz, "rollbackTrans",
JDBC_EXECUTOR_TRANSACTION_SIGNATURE, _executor_abort_trans_id));
+ RETURN_IF_ERROR(register_id(_executor_clazz, "getResultColumnTypeNames",
+ JDBC_EXECUTOR_GET_TYPES_SIGNATURE, _executor_get_types_id));
return Status::OK();
}
@@ -314,6 +437,14 @@ Status JdbcConnector::_convert_column_data(JNIEnv* env, jobject jobj,
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
break;
}
+ case TYPE_LARGEINT: {
+ StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+ std::string data = _jobject_to_string(env, jobj);
+ __int128 num =
+ StringParser::string_to_int<__int128>(data.data(), data.size(), &parse_result);
+ reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
+ break;
+ }
case TYPE_DECIMALV2: {
std::string data = _jobject_to_string(env, jobj);
DecimalV2Value decimal_slot;
diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h
index f3575dfd52..e6da1a0151 100644
--- a/be/src/vec/exec/vjdbc_connector.h
+++ b/be/src/vec/exec/vjdbc_connector.h
@@ -19,6 +19,7 @@
#include <jni.h>
+#include "common/status.h"
#include "exec/table_connector.h"
namespace doris {
@@ -60,6 +61,8 @@ public:
private:
Status _register_func_id(JNIEnv* env);
+ Status _check_column_type();
+ Status _check_type(SlotDescriptor*, const std::string& type_str);
Status _convert_column_data(JNIEnv* env, jobject jobj, const SlotDescriptor* slot_desc,
vectorized::IColumn* column_ptr);
std::string _jobject_to_string(JNIEnv* env, jobject jobj);
@@ -78,6 +81,7 @@ private:
jmethodID _executor_read_id;
jmethodID _executor_has_next_id;
jmethodID _executor_get_blocks_id;
+ jmethodID _executor_get_types_id;
jmethodID _executor_close_id;
jmethodID _executor_get_list_id;
jmethodID _executor_get_list_size_id;
diff --git a/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md b/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md
index 552cd7e848..8d19f7ffab 100644
--- a/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md
+++ b/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md
@@ -162,16 +162,16 @@ There are different data types among different databases. Here is a list of the
| MySQL | Doris |
| :------: | :------: |
| BOOLEAN | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| DATE | DATE |
-| FLOAT | FLOAT |
| TINYINT | TINYINT |
| SMALLINT | SMALLINT |
| INT | INT |
| BIGINT | BIGINT |
-| DOUBLE | DOUBLE |
+|BIGINT UNSIGNED|LARGEINT|
+| VARCHAR | VARCHAR |
+| DATE | DATE |
+| FLOAT | FLOAT |
| DATETIME | DATETIME |
+| DOUBLE | DOUBLE |
| DECIMAL | DECIMAL |
@@ -180,43 +180,42 @@ There are different data types among different databases. Here is a list of the
| PostgreSQL | Doris |
| :--------------: | :------: |
| BOOLEAN | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| DATE | DATE |
-| REAL | FLOAT |
| SMALLINT | SMALLINT |
| INT | INT |
| BIGINT | BIGINT |
-| DOUBLE PRECISION | DOUBLE |
+| VARCHAR | VARCHAR |
+| DATE | DATE |
| TIMESTAMP | DATETIME |
+| REAL | FLOAT |
+| FLOAT | DOUBLE |
| DECIMAL | DECIMAL |
### Oracle
| Oracle | Doris |
| :------: | :------: |
-| CHAR | CHAR |
| VARCHAR | VARCHAR |
| DATE | DATETIME |
| SMALLINT | SMALLINT |
| INT | INT |
+| REAL | FLOAT |
+| FLOAT | DOUBLE |
| NUMBER | DECIMAL |
-
### SQL server
| SQLServer | Doris |
| :-------: | :------: |
| BIT | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| DATE | DATE |
-| REAL | FLOAT |
| TINYINT | TINYINT |
| SMALLINT | SMALLINT |
| INT | INT |
| BIGINT | BIGINT |
+| VARCHAR | VARCHAR |
+| DATE | DATE |
| DATETIME | DATETIME |
+| REAL | FLOAT |
+| FLOAT | DOUBLE |
| DECIMAL | DECIMAL |
### ClickHouse
diff --git a/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md b/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md
index d333e8850f..4ef47e1d66 100644
--- a/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md
+++ b/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md
@@ -165,16 +165,16 @@ PROPERTIES (
| MySQL | Doris |
| :------: | :------: |
| BOOLEAN | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| DATE | DATE |
-| FLOAT | FLOAT |
| TINYINT | TINYINT |
| SMALLINT | SMALLINT |
| INT | INT |
| BIGINT | BIGINT |
-| DOUBLE | DOUBLE |
+|BIGINT UNSIGNED|LARGEINT|
+| VARCHAR | VARCHAR |
+| DATE | DATE |
+| FLOAT | FLOAT |
| DATETIME | DATETIME |
+| DOUBLE | DOUBLE |
| DECIMAL | DECIMAL |
@@ -183,26 +183,26 @@ PROPERTIES (
| PostgreSQL | Doris |
| :--------------: | :------: |
| BOOLEAN | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| DATE | DATE |
-| REAL | FLOAT |
| SMALLINT | SMALLINT |
| INT | INT |
| BIGINT | BIGINT |
-| DOUBLE PRECISION | DOUBLE |
+| VARCHAR | VARCHAR |
+| DATE | DATE |
| TIMESTAMP | DATETIME |
+| REAL | FLOAT |
+| FLOAT | DOUBLE |
| DECIMAL | DECIMAL |
### Oracle
| Oracle | Doris |
| :------: | :------: |
-| CHAR | CHAR |
| VARCHAR | VARCHAR |
| DATE | DATETIME |
| SMALLINT | SMALLINT |
| INT | INT |
+| REAL | FLOAT |
+| FLOAT | DOUBLE |
| NUMBER | DECIMAL |
@@ -211,15 +211,15 @@ PROPERTIES (
| SQLServer | Doris |
| :-------: | :------: |
| BIT | BOOLEAN |
-| CHAR | CHAR |
-| VARCHAR | VARCHAR |
-| DATE | DATE |
-| REAL | FLOAT |
| TINYINT | TINYINT |
| SMALLINT | SMALLINT |
| INT | INT |
| BIGINT | BIGINT |
+| VARCHAR | VARCHAR |
+| DATE | DATE |
| DATETIME | DATETIME |
+| REAL | FLOAT |
+| FLOAT | DOUBLE |
| DECIMAL | DECIMAL |
### ClickHouse
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
index 0a267bd9a6..9539f65aba 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
@@ -53,6 +53,7 @@ public class JdbcExecutor {
private ResultSetMetaData resultSetMetaData = null;
// Use HikariDataSource to help us manage the JDBC connections.
private HikariDataSource dataSource = null;
+ private List<String> resultColumnTypeNames = null;
public JdbcExecutor(byte[] thriftParams) throws Exception {
TJdbcExecutorCtorParams request = new TJdbcExecutorCtorParams();
@@ -110,6 +111,19 @@ public class JdbcExecutor {
}
}
+ public List<String> getResultColumnTypeNames() throws UdfRuntimeException {
+ try {
+ int count = resultSetMetaData.getColumnCount();
+ resultColumnTypeNames = new ArrayList<>(count);
+ for (int i = 0; i < count; ++i) {
+ resultColumnTypeNames.add(resultSetMetaData.getColumnClassName(i + 1));
+ }
+ return resultColumnTypeNames;
+ } catch (SQLException e) {
+ throw new UdfRuntimeException("JDBC executor getResultColumnTypeNames has error: ", e);
+ }
+ }
+
public void openTrans() throws UdfRuntimeException {
try {
if (conn != null) {
diff --git a/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy b/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy
index b84ee57072..a18156527c 100644
--- a/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy
+++ b/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy
@@ -55,7 +55,7 @@ suite("test_jdbc_query_pg", "p0") {
k2 char(100),
k3 varchar(128),
k4 date,
- k5 double,
+ k5 float,
k6 smallint,
k7 int,
k8 bigint,
@@ -209,7 +209,7 @@ suite("test_jdbc_query_pg", "p0") {
`m_time` DATETIME NULL,
`app_id` BIGINT(20) NULL,
`t_id` BIGINT(20) NULL,
- `deleted` TEXT NULL,
+ `deleted` boolean NULL,
`w_t_s` DATETIME NULL,
`rf_id` TEXT NULL,
`e_info` TEXT NULL,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org