You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2022/10/07 08:12:11 UTC

[doris] branch master updated: fix bug that last line of data lost for stream load when line delimiter is more than one character (#13066)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b03977689 fix bug that last line of data lost for stream load when line delimiter is more than one character (#13066)
8b03977689 is described below

commit 8b0397768921fd56a3a24aeab350b18a884e7e2f
Author: weizuo93 <we...@apache.org>
AuthorDate: Fri Oct 7 16:12:05 2022 +0800

    fix bug that last line of data lost for stream load when line delimiter is more than one character (#13066)
---
 be/src/exec/plain_text_line_reader.cpp             |  4 +--
 .../load_p0/stream_load/test_line_delimiter.csv    |  1 +
 .../load_p0/stream_load/test_stream_load.groovy    | 42 ++++++++++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/be/src/exec/plain_text_line_reader.cpp b/be/src/exec/plain_text_line_reader.cpp
index 6d9d25841b..06d55dccd2 100644
--- a/be/src/exec/plain_text_line_reader.cpp
+++ b/be/src/exec/plain_text_line_reader.cpp
@@ -200,9 +200,7 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e
             // for multi bytes delimiter we cannot set offset to avoid incomplete
             // delimiter
             // read from file reader
-            if (_line_delimiter_length == 1) {
-                offset = output_buf_read_remaining();
-            }
+            offset = output_buf_read_remaining();
             extend_output_buf();
             if ((_input_buf_limit > _input_buf_pos) && _more_input_bytes == 0) {
                 // we still have data in input which is not decompressed.
diff --git a/regression-test/data/load_p0/stream_load/test_line_delimiter.csv b/regression-test/data/load_p0/stream_load/test_line_delimiter.csv
new file mode 100644
index 0000000000..6a9e628eae
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_line_delimiter.csv
@@ -0,0 +1 @@
+1|aaweizuo2|bbweizuo3|cc
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
index 0ac4e8095b..0e28397410 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
@@ -88,4 +88,46 @@ suite("test_stream_load", "p0") {
             assertEquals(1, json.NumberFilteredRows)
         }
     }
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE ${tableName} (
+          `id` int(11) NULL,
+          `value` varchar(64) NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "in_memory" = "false",
+        "storage_format" = "V2",
+        "disable_auto_compaction" = "false"
+        );
+    """
+
+    streamLoad {
+        table "${tableName}"
+
+        set 'line_delimiter', 'weizuo'
+        set 'column_separator', '|'
+        set 'columns', 'id, value'
+
+        file 'test_line_delimiter.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(3, json.NumberTotalRows)
+            assertEquals(0, json.NumberFilteredRows)
+        }
+    }
+
+    rowCount = sql "select count(1) from ${tableName}"
+    assertEquals(3, rowCount[0][0])
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org