You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2022/07/21 08:29:25 UTC

[doris] branch master updated: Add the supported sub-type for array (#10824)

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f6f35e886 Add the supported sub-type for array (#10824)
5f6f35e886 is described below

commit 5f6f35e88664eae4033ee6ed5b17e91e111dac68
Author: carlvinhust2012 <hu...@126.com>
AuthorDate: Thu Jul 21 16:29:17 2022 +0800

    Add the supported sub-type for array (#10824)
    
    1.This pr is used for adding the supported sub-type for array which has been modified in #9916
    2.add regression test for the supported sub-type
    
    Co-authored-by: hucheng01 <hu...@baidu.com>
---
 .../java/org/apache/doris/analysis/TypeDef.java    |  14 +-
 .../data/load/broker_load/simple_array.json        |   5 +
 .../data/load/broker_load/simple_object_array.json |   5 +
 .../data/load/broker_load/test_array_load.out      |  33 ++++
 .../suites/load/broker_load/test_array_load.groovy | 198 +++++++++++++++++++++
 5 files changed, 252 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
index 91ce563896..7af29a8fc2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
@@ -117,10 +117,18 @@ public class TypeDef implements ParseNode {
         if (type.isNull()) {
             throw new AnalysisException("Unsupported data type: " + type.toSql());
         }
-        if (!type.getPrimitiveType().isIntegerType()
-                && !type.getPrimitiveType().isCharFamily()) {
-            throw new AnalysisException("Array column just support INT/VARCHAR sub-type");
+        // check whether the array sub-type is supported
+        Boolean isSupportType = false;
+        for (Type subType : Type.getArraySubTypes()) {
+            if (type.getPrimitiveType() == subType.getPrimitiveType()) {
+                isSupportType = true;
+                break;
+            }
         }
+        if (!isSupportType) {
+            throw new AnalysisException("Array unsupported sub-type: " + type.toSql());
+        }
+
         if (type.getPrimitiveType().isStringType()
                 && !type.isAssignedStrLenInColDefinition()) {
             type.setLength(1);
diff --git a/regression-test/data/load/broker_load/simple_array.json b/regression-test/data/load/broker_load/simple_array.json
new file mode 100644
index 0000000000..15fc2a3964
--- /dev/null
+++ b/regression-test/data/load/broker_load/simple_array.json
@@ -0,0 +1,5 @@
+[{"k1": 1, "k2": [1,2,3,4,5], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]},
+{"k1": 2, "k2": [6,7,8,9,10], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]},
+{"k1": 3, "k2": [], "k3": [32767,32768,32769], "k4": [null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["happy","birthday"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]},
+{"k1": 4, "k2": [null], "k3": [32767,32768,32769], "k4": [ null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]},
+{"k1": 5, "k2": [null,null], "k3": [32767,32768,null], "k4": [65534,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}]
\ No newline at end of file
diff --git a/regression-test/data/load/broker_load/simple_object_array.json b/regression-test/data/load/broker_load/simple_object_array.json
new file mode 100644
index 0000000000..ca57e52676
--- /dev/null
+++ b/regression-test/data/load/broker_load/simple_object_array.json
@@ -0,0 +1,5 @@
+{"k1": 1, "k2": [1,2,3,4,5], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}
+{"k1": 2, "k2": [6,7,8,9,10], "k3": [32767,32768,32769], "k4": [65534,65535,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}
+{"k1": 3, "k2": [], "k3": [32767,32768,32769], "k4": [null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["happy","birthday"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}
+{"k1": 4, "k2": [null], "k3": [32767,32768,32769], "k4": [ null,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}
+{"k1": 5, "k2": [null,null], "k3": [32767,32768,null], "k4": [65534,null,65536], "k5": ["a","b","c","d","e"], "k6": ["hello","world"], "k7": ["1991-01-01"], "k8": ["1991-01-01 00:00:00"], "k9": [0.33,0.67], "k10": [3.1415926,0.878787878], "k11": [1,1.2,1.3]}
\ No newline at end of file
diff --git a/regression-test/data/load/broker_load/test_array_load.out b/regression-test/data/load/broker_load/test_array_load.out
new file mode 100644
index 0000000000..e6189ea55d
--- /dev/null
+++ b/regression-test/data/load/broker_load/test_array_load.out
@@ -0,0 +1,33 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+1	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+2	[6, 7, 8, 9, 10]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+3	[]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['happy', 'birthday']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+4	[NULL]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+5	[NULL, NULL]	[32767, 32768, NULL]	[65534, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+100	[1, 2, 3]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c']	['hello', 'world']	[2022-07-13]	[2022-07-13 12:30:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[4, 5.5, 6.67]
+
+-- !select --
+1	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+2	[6, 7, 8, 9, 10]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+3	[]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['happy', 'birthday']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+4	[NULL]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+5	[NULL, NULL]	[32767, 32768, NULL]	[65534, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+100	[1, 2, 3]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c']	['hello', 'world']	[2022-07-13]	[2022-07-13 12:30:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[4, 5.5, 6.67]
+
+-- !select --
+1	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+2	[6, 7, 8, 9, 10]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+3	[]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['happy', 'birthday']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+4	[NULL]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+5	[NULL, NULL]	[32767, 32768, NULL]	[65534, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+100	[1, 2, 3]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c']	['hello', 'world']	[2022-07-13]	[2022-07-13 12:30:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[4, 5.5, 6.67]
+
+-- !select --
+1	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+2	[6, 7, 8, 9, 10]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+3	[]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['happy', 'birthday']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+4	[NULL]	[32767, 32768, 32769]	[NULL, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+5	[NULL, NULL]	[32767, 32768, NULL]	[65534, NULL, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+100	[1, 2, 3]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c']	['hello', 'world']	[2022-07-13]	[2022-07-13 12:30:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[4, 5.5, 6.67]
+
diff --git a/regression-test/suites/load/broker_load/test_array_load.groovy b/regression-test/suites/load/broker_load/test_array_load.groovy
new file mode 100644
index 0000000000..d74dd5cbe7
--- /dev/null
+++ b/regression-test/suites/load/broker_load/test_array_load.groovy
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_load", "load") {
+    // define a sql table
+    def testTable = "tbl_test_array_load"
+    
+    def create_test_table = {testTablex, enable_vectorized_flag ->
+        // multi-line sql
+        sql """ set enable_array_type = true """
+        
+        if (enable_vectorized_flag) {
+            sql """ set enable_vectorized_engine = true """
+        }
+
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+              `k1` INT(11) NULL COMMENT "",
+              `k2` ARRAY<SMALLINT> NOT NULL COMMENT "",
+              `k3` ARRAY<INT(11)> NOT NULL COMMENT "",
+              `k4` ARRAY<BIGINT> NOT NULL COMMENT "",
+              `k5` ARRAY<CHAR> NOT NULL COMMENT "",
+              `k6` ARRAY<VARCHAR(20)> NULL COMMENT "",
+              `k7` ARRAY<DATE> NOT NULL COMMENT "", 
+              `k8` ARRAY<DATETIME> NOT NULL COMMENT "",
+              `k9` ARRAY<FLOAT> NOT NULL COMMENT "",
+              `k10` ARRAY<DOUBLE> NOT NULL COMMENT "",
+              `k11` ARRAY<DECIMAL(20, 6)> NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+            """
+        
+        // DDL/DML return 1 row and 3 column, the only value is update row count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+        
+        // insert 1 row to check whether the table is ok
+        def result2 = sql """ INSERT INTO ${testTable} VALUES
+                        (100, [1, 2, 3], [32767, 32768, 32769], [65534, 65535, 65536], ['a', 'b', 'c'], ["hello", "world"], 
+                        ['2022-07-13'], ['2022-07-13 12:30:00'], [0.33, 0.67], [3.1415926, 0.878787878], [4, 5.5, 6.67])
+                        """
+        assertTrue(result2.size() == 1)
+        assertTrue(result2[0].size() == 1)
+        assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+    }
+
+    def load_array_data = {strip_flag, read_flag, format_flag, exprs, json_paths, 
+                            json_root, where_expr, fuzzy_flag, file_name ->
+        // load the json data
+        streamLoad {
+            table "tbl_test_array_load"
+            
+            // set http request header params
+            set 'strip_outer_array', strip_flag
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            set 'columns', exprs
+            set 'jsonpaths', json_paths
+            set 'json_root', json_root
+            set 'where', where_expr
+            set 'fuzzy_parse', fuzzy_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+
+            // if declared a check callback, the default check condition will ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+    }
+    
+    def load_from_hdfs = {testTablex, label, hdfsFilePath, format, brokerName, hdfsUser, hdfsPasswd ->
+        def result1= sql """
+                        LOAD LABEL ${label} (
+                            DATA INFILE("${hdfsFilePath}")
+                            INTO TABLE ${testTablex} 
+                            FORMAT as "${format}")
+                        with BROKER "${brokerName}" (
+                        "username"="${hdfsUser}",
+                        "password"="${hdfsPasswd}")
+                        PROPERTIES  (
+                        "timeout"="1200",
+                        "max_filter_ratio"="0.1");
+                        """
+        
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+    }
+    
+    // case1: import array data in json format and enable vectorized engine
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+        
+        create_test_table.call(testTable, true)
+
+        load_array_data.call('true', '', 'json', '', '', '', '', '', 'simple_array.json')
+        
+        // select the table and check whether the data is correct
+        qt_select "select * from ${testTable} order by k1"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
+    // case2: import array data in json format and disable vectorized engine
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+        
+        create_test_table.call(testTable, false)
+
+        load_array_data.call('true', '', 'json', '', '', '', '', '', 'simple_array.json')
+        
+        // select the table and check whether the data is correct
+        qt_select "select * from ${testTable} order by k1"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
+    // if 'enableHdfs' in regression-conf.groovy has been set to true,
+    // the test will run these case as below.
+    if (enableHdfs()) {
+        brokerName =getBrokerName()
+        hdfsUser = getHdfsUser()
+        hdfsPasswd = getHdfsPasswd()
+        def hdfs_file_path = uploadToHdfs "broker_load/simple_object_array.json"
+        def format = "json" 
+
+        // case3: import array data by hdfs and enable vectorized engine
+        try {
+            sql "DROP TABLE IF EXISTS ${testTable}"
+            
+            create_test_table.call(testTable, true)
+
+            def test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
+            load_from_hdfs.call(testTable, test_load_label, hdfs_file_path, format,
+                                brokerName, hdfsUser, hdfsPasswd)
+            
+            // wait to load finished
+            sleep(5000)
+            
+            // select the table and check whether the data is correct
+            qt_select "select * from ${testTable} order by k1"
+
+        } finally {
+            try_sql("DROP TABLE IF EXISTS ${testTable}")
+        }
+
+        // case4: import array data by hdfs and disable vectorized engine
+        try {
+            sql "DROP TABLE IF EXISTS ${testTable}"
+            
+            create_test_table.call(testTable, false)
+
+            def test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
+            load_from_hdfs.call(testTable, test_load_label, hdfs_file_path, format,
+                                brokerName, hdfsUser, hdfsPasswd)
+            
+            // wait to load finished
+            sleep(5000)
+            
+            // select the table and check whether the data is correct
+            qt_select "select * from ${testTable} order by k1"
+
+        } finally {
+            try_sql("DROP TABLE IF EXISTS ${testTable}")
+        }
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org