You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2022/10/13 15:07:27 UTC

[doris] branch master updated: [fix](array-type) fix the wrong result when import array element with double quotes (#12786)

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 88e08a92d8 [fix](array-type) fix the wrong result when import array element with double quotes (#12786)
88e08a92d8 is described below

commit 88e08a92d839b795a4cfd44615b27f1a279e3c25
Author: carlvinhust2012 <hu...@126.com>
AuthorDate: Thu Oct 13 23:07:19 2022 +0800

    [fix](array-type) fix the wrong result when import array element with double quotes (#12786)
    
    Co-authored-by: hucheng01 <hu...@baidu.com>
---
 be/src/vec/data_types/data_type_array.cpp          | 23 ++++++++++++++++++----
 .../data/load_p0/broker_load/simple_array.data     |  4 ++--
 .../data/load_p0/broker_load/test_array_load.out   |  4 ++--
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp
index 4a0c1ae049..f859dac193 100644
--- a/be/src/vec/data_types/data_type_array.cpp
+++ b/be/src/vec/data_types/data_type_array.cpp
@@ -221,12 +221,27 @@ Status DataTypeArray::from_string(ReadBuffer& rb, IColumn* column) const {
             continue;
         }
 
-        ReadBuffer read_buffer(rb.position(), nested_str_len);
+        // Note: here we will trim elements, such as
+        // ["2020-09-01", "2021-09-01"  , "2022-09-01" ] ==> ["2020-09-01","2021-09-01","2022-09-01"]
+        size_t begin_pos = 0;
+        size_t end_pos = nested_str_len - 1;
+        while (begin_pos < end_pos) {
+            if (isspace(*(rb.position() + begin_pos))) {
+                ++begin_pos;
+            } else if (isspace(*(rb.position() + end_pos))) {
+                --end_pos;
+            } else {
+                break;
+            }
+        }
+
         // dispose the case of ["123"] or ['123']
-        auto begin_char = *rb.position();
-        auto end_char = *(rb.position() + nested_str_len - 1);
+        ReadBuffer read_buffer(rb.position(), nested_str_len);
+        auto begin_char = *(rb.position() + begin_pos);
+        auto end_char = *(rb.position() + end_pos);
         if (begin_char == end_char && (begin_char == '"' || begin_char == '\'')) {
-            read_buffer = ReadBuffer(rb.position() + 1, nested_str_len - 2);
+            int64_t length = end_pos - begin_pos - 1;
+            read_buffer = ReadBuffer(rb.position() + begin_pos + 1, (length > 0 ? length : 0));
         }
 
         auto st = nested->from_string(read_buffer, &nested_column);
diff --git a/regression-test/data/load_p0/broker_load/simple_array.data b/regression-test/data/load_p0/broker_load/simple_array.data
index 88eb710915..614f610f16 100644
--- a/regression-test/data/load_p0/broker_load/simple_array.data
+++ b/regression-test/data/load_p0/broker_load/simple_array.data
@@ -1,5 +1,5 @@
-1/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/["1991-01-01"]/["1991-01-01 00:00:00"]/[0.33,0.67]/[3.1415926,0.878787878]/[1,1.2,1.3]
-2/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/\N/\N/\N/\N/[1,\N,1.3]
+1/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/["1991-01-01", "1992-02-02", "1993-03-03"]/["1991-01-01 00:00:00"]/[0.33,0.67]/[3.1415926,0.878787878]/[1,1.2,1.3]
+2/[1,2,3,4,5]/[32767,32768,32769]/[65534,65535,65536]/["a","b","c","d","e"]/["hello","world"]/['1991-01-01', '1992-02-02', '1993-03-03']/\N/\N/\N/[1,\N,1.3]
 3/\N/\N/\N/\N/\N/\N/\N/\N/\N/\N
 4/1,2,3,4,5/\N/\N/\N/\N/\N/\N/\N/\N/\N
 5/[1,2,3,4,5/\N/\N/\N/\N/\N/\N/\N/\N/\N
\ No newline at end of file
diff --git a/regression-test/data/load_p0/broker_load/test_array_load.out b/regression-test/data/load_p0/broker_load/test_array_load.out
index 0dfbd74d4f..c926e9534b 100644
--- a/regression-test/data/load_p0/broker_load/test_array_load.out
+++ b/regression-test/data/load_p0/broker_load/test_array_load.out
@@ -32,8 +32,8 @@
 100	[1, 2, 3]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c']	['hello', 'world']	[2022-07-13]	[2022-07-13 12:30:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[4, 5.5, 6.67]
 
 -- !select --
-1	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
-2	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	\N	\N	\N	\N	[1, NULL, 1.3]
+1	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01, 1992-02-02, 1993-03-03]	[1991-01-01 00:00:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[1, 1.2, 1.3]
+2	[1, 2, 3, 4, 5]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c', 'd', 'e']	['hello', 'world']	[1991-01-01, 1992-02-02, 1993-03-03]	\N	\N	\N	[1, NULL, 1.3]
 3	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
 100	[1, 2, 3]	[32767, 32768, 32769]	[65534, 65535, 65536]	['a', 'b', 'c']	['hello', 'world']	[2022-07-13]	[2022-07-13 12:30:00]	[0.33, 0.67]	[3.1415926, 0.878787878]	[4, 5.5, 6.67]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org