You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/20 01:55:17 UTC
[incubator-doris] branch master updated: [fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5fa6e892be [fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190)
5fa6e892be is described below
commit 5fa6e892beb8b51ff18b8a183fd5c92b568f5eae
Author: Jibing-Li <64...@users.noreply.github.com>
AuthorDate: Fri May 20 09:55:13 2022 +0800
[fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190)
Hive and trino/presto would automatically trim the trailing spaces but Doris doesn't.
This would cause different query result with hive.
Add a new session variable "trim_tailing_spaces_for_external_table_query".
If set to true, when reading csv from broker scan node, it will trim the tailing space of the column
---
be/src/exec/broker_scanner.cpp | 29 ++++++++++++++++------
be/src/runtime/runtime_state.h | 4 +++
.../java/org/apache/doris/qe/SessionVariable.java | 14 +++++++++++
gensrc/thrift/PaloInternalService.thrift | 3 +++
4 files changed, 43 insertions(+), 7 deletions(-)
diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp
index c394424092..d9453fecf0 100644
--- a/be/src/exec/broker_scanner.cpp
+++ b/be/src/exec/broker_scanner.cpp
@@ -339,19 +339,20 @@ void BrokerScanner::split_line(const Slice& line) {
delete[] ptr;
} else {
const char* value = line.data;
- size_t start = 0; // point to the start pos of next col value.
- size_t curpos = 0; // point to the start pos of separator matching sequence.
- size_t p1 = 0; // point to the current pos of separator matching sequence.
+ size_t start = 0; // point to the start pos of next col value.
+ size_t curpos = 0; // point to the start pos of separator matching sequence.
+ size_t p1 = 0; // point to the current pos of separator matching sequence.
+ size_t non_space = 0; // point to the last pos of non_space charactor.
// Separator: AAAA
//
- // curpos
+ // p1
// ▼
// AAAA
// 1000AAAA2000AAAA
// ▲ ▲
// Start │
- // p1
+ // curpos
while (curpos < line.size) {
if (*(value + curpos + p1) != _value_separator[p1]) {
@@ -362,16 +363,30 @@ void BrokerScanner::split_line(const Slice& line) {
p1++;
if (p1 == _value_separator_length) {
// Match a separator
- _split_values.emplace_back(value + start, curpos - start);
+ non_space = curpos;
+ // Trim tailing spaces. Be consistent with hive and trino's behavior.
+ if (_state->trim_tailing_spaces_for_external_table_query()) {
+ while (non_space > start && *(value + non_space - 1) == ' ') {
+ non_space--;
+ }
+ }
+ _split_values.emplace_back(value + start, non_space - start);
start = curpos + _value_separator_length;
curpos = start;
p1 = 0;
+ non_space = 0;
}
}
}
CHECK(curpos == line.size) << curpos << " vs " << line.size;
- _split_values.emplace_back(value + start, curpos - start);
+ non_space = curpos;
+ if (_state->trim_tailing_spaces_for_external_table_query()) {
+ while (non_space > start && *(value + non_space - 1) == ' ') {
+ non_space--;
+ }
+ }
+ _split_values.emplace_back(value + start, non_space - start);
}
}
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 8f5c0b86a1..eed32d8b82 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -326,6 +326,10 @@ public:
bool enable_vectorized_exec() const { return _query_options.enable_vectorized_engine; }
+ bool trim_tailing_spaces_for_external_table_query() const {
+ return _query_options.trim_tailing_spaces_for_external_table_query;
+ }
+
bool return_object_data_as_binary() const {
return _query_options.return_object_data_as_binary;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index aad54ecf75..ce9e9a40d0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -180,6 +180,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_PROJECTION = "enable_projection";
+ public static final String TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY = "trim_tailing_spaces_for_external_table_query";
+
// session origin value
public Map<Field, String> sessionOriginValue = new HashMap<Field, String>();
// check stmt is or not [select /*+ SET_VAR(...)*/ ...]
@@ -439,6 +441,9 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = ENABLE_PROJECTION)
private boolean enableProjection = true;
+ @VariableMgr.VarAttr(name = TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY, needForward = true)
+ public boolean trimTailingSpacesForExternalTableQuery = false;
+
public String getBlockEncryptionMode() {
return blockEncryptionMode;
}
@@ -895,6 +900,14 @@ public class SessionVariable implements Serializable, Writable {
return enableProjection;
}
+ public boolean isTrimTailingSpacesForExternalTableQuery() {
+ return trimTailingSpacesForExternalTableQuery;
+ }
+
+ public void setTrimTailingSpacesForExternalTableQuery(boolean trimTailingSpacesForExternalTableQuery) {
+ this.trimTailingSpacesForExternalTableQuery = trimTailingSpacesForExternalTableQuery;
+ }
+
// Serialize to thrift object
// used for rest api
public TQueryOptions toThrift() {
@@ -912,6 +925,7 @@ public class SessionVariable implements Serializable, Writable {
tResult.setCodegenLevel(codegenLevel);
tResult.setEnableVectorizedEngine(enableVectorizedEngine);
tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary);
+ tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery);
tResult.setBatchSize(batchSize);
tResult.setDisableStreamPreaggregations(disableStreamPreaggregations);
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index a67a43d2d4..4787513baf 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -160,6 +160,9 @@ struct TQueryOptions {
// show bitmap data in result, if use this in mysql cli may make the terminal
// output corrupted character
43: optional bool return_object_data_as_binary = false
+
+ // trim tailing spaces while querying external table and stream load
+ 44: optional bool trim_tailing_spaces_for_external_table_query = false
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org