You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/26 15:50:26 UTC

[doris] branch branch-1.2-lts updated (f51cfce6c3 -> caeed14e6d)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


    from f51cfce6c3 [fix](compile) fix compile failed in some case due to std::pair
     new afe5c517d9 [Improvement](S3) support access s3 via temporary security credentials (#15340)
     new 683262c3c1 [fix](multi-catalog) throw NPE when reading data after EOF (#15358)
     new caeed14e6d [fix](inbitmap) fix core dump caused by bitmap filter with union (#15333)

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/util/s3_util.cpp                            |  3 +++
 be/src/util/s3_util.h                              |  1 +
 .../exec/format/parquet/vparquet_column_reader.cpp |  1 -
 .../exec/format/parquet/vparquet_column_reader.h   |  4 ++--
 .../exec/format/parquet/vparquet_group_reader.cpp  |  3 ++-
 .../import/import-scenes/external-storage-load.md  | 16 ++++++++++++-
 .../import/import-way/s3-load-manual.md            | 16 ++++++++++++-
 .../Backup-and-Restore/CREATE-REPOSITORY.md        | 17 ++++++++++++++
 .../import/import-scenes/external-storage-load.md  | 16 ++++++++++++-
 .../import/import-way/s3-load-manual.md            | 14 +++++++++++-
 .../Backup-and-Restore/CREATE-REPOSITORY.md        | 17 ++++++++++++++
 .../doris/analysis/BitmapFilterPredicate.java      |  5 +++++
 .../java/org/apache/doris/backup/S3Storage.java    | 18 +++++++++++----
 .../java/org/apache/doris/catalog/S3Resource.java  |  8 +++++++
 .../org/apache/doris/planner/OriginalPlanner.java  |  2 +-
 .../org/apache/doris/planner/SetOperationNode.java | 26 ++++++++++++++++++++++
 .../data/query_p0/join/test_bitmap_filter.out      | 16 +++++++++++++
 .../suites/query_p0/join/test_bitmap_filter.groovy |  5 +++++
 18 files changed, 175 insertions(+), 13 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 02/03: [fix](multi-catalog) throw NPE when reading data after EOF (#15358)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 683262c3c18b836bb21a78a0a7398e9ef0993d9c
Author: Ashin Gau <As...@users.noreply.github.com>
AuthorDate: Mon Dec 26 22:49:35 2022 +0800

    [fix](multi-catalog) throw NPE when reading data after EOF (#15358)
    
    1. Fix 1 bug:
    Throw null pointer exception when reading data after the reader reaches the end of file, so should return directly when `_do_lazy_read` read no data.
    
    2. Optimize code:
    Remove unused parameters.
    
    3. Fix regression test
---
 be/src/vec/exec/format/parquet/vparquet_column_reader.cpp | 1 -
 be/src/vec/exec/format/parquet/vparquet_column_reader.h   | 4 ++--
 be/src/vec/exec/format/parquet/vparquet_group_reader.cpp  | 3 ++-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 75f43b4730..79760f82a6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -27,7 +27,6 @@
 namespace doris::vectorized {
 
 Status ParquetColumnReader::create(FileReader* file, FieldSchema* field,
-                                   const ParquetReadColumn& column,
                                    const tparquet::RowGroup& row_group,
                                    const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz,
                                    std::unique_ptr<ParquetColumnReader>& reader,
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index de0ec185b9..f8d8085df8 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -107,7 +107,7 @@ public:
     virtual Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
                                     ColumnSelectVector& select_vector, size_t batch_size,
                                     size_t* read_rows, bool* eof) = 0;
-    static Status create(FileReader* file, FieldSchema* field, const ParquetReadColumn& column,
+    static Status create(FileReader* file, FieldSchema* field,
                          const tparquet::RowGroup& row_group,
                          const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz,
                          std::unique_ptr<ParquetColumnReader>& reader, size_t max_buf_size);
@@ -190,4 +190,4 @@ private:
     level_t _EMPTY_ARRAY = -1;
     level_t _NULL_ARRAY = -1;
 };
-}; // namespace doris::vectorized
\ No newline at end of file
+}; // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 24441896a9..e3f841f1b1 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -57,7 +57,7 @@ Status RowGroupReader::init(const FieldDescriptor& schema, std::vector<RowRange>
     for (auto& read_col : _read_columns) {
         auto field = const_cast<FieldSchema*>(schema.get_column(read_col._file_slot_name));
         std::unique_ptr<ParquetColumnReader> reader;
-        RETURN_IF_ERROR(ParquetColumnReader::create(_file_reader, field, read_col, _row_group_meta,
+        RETURN_IF_ERROR(ParquetColumnReader::create(_file_reader, field, _row_group_meta,
                                                     _read_ranges, _ctz, reader, max_buf_size));
         auto col_iter = col_offsets.find(read_col._parquet_col_id);
         if (col_iter != col_offsets.end()) {
@@ -233,6 +233,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re
         DCHECK_EQ(pre_read_rows + _cached_filtered_rows, 0);
         *read_rows = 0;
         *batch_eof = true;
+        return Status::OK();
     }
 
     ColumnSelectVector& select_vector = *select_vector_ptr;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 01/03: [Improvement](S3) support access s3 via temporary security credentials (#15340)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit afe5c517d992305c18b8ed3f8c6c199bfb19e44e
Author: Yulei-Yang <yu...@gmail.com>
AuthorDate: Mon Dec 26 00:31:55 2022 +0800

    [Improvement](S3) support access s3 via temporary security credentials (#15340)
---
 be/src/util/s3_util.cpp                                |  3 +++
 be/src/util/s3_util.h                                  |  1 +
 .../import/import-scenes/external-storage-load.md      | 16 +++++++++++++++-
 .../data-operate/import/import-way/s3-load-manual.md   | 16 +++++++++++++++-
 .../Backup-and-Restore/CREATE-REPOSITORY.md            | 17 +++++++++++++++++
 .../import/import-scenes/external-storage-load.md      | 16 +++++++++++++++-
 .../data-operate/import/import-way/s3-load-manual.md   | 14 +++++++++++++-
 .../Backup-and-Restore/CREATE-REPOSITORY.md            | 17 +++++++++++++++++
 .../main/java/org/apache/doris/backup/S3Storage.java   | 18 ++++++++++++++----
 .../main/java/org/apache/doris/catalog/S3Resource.java |  8 ++++++++
 10 files changed, 118 insertions(+), 8 deletions(-)

diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index f9b67e57d7..26227cf63e 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -117,6 +117,9 @@ std::shared_ptr<Aws::S3::S3Client> ClientFactory::create(
     Aws::Auth::AWSCredentials aws_cred(properties.find(S3_AK)->second,
                                        properties.find(S3_SK)->second);
     DCHECK(!aws_cred.IsExpiredOrEmpty());
+    if (properties.find(S3_TOKEN) != properties.end()) {
+        aws_cred.SetSessionToken(properties.find(S3_TOKEN)->second);
+    }
 
     Aws::Client::ClientConfiguration aws_config;
     aws_config.endpointOverride = properties.find(S3_ENDPOINT)->second;
diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h
index 74c723bbec..79deec6225 100644
--- a/be/src/util/s3_util.h
+++ b/be/src/util/s3_util.h
@@ -35,6 +35,7 @@ const static std::string S3_AK = "AWS_ACCESS_KEY";
 const static std::string S3_SK = "AWS_SECRET_KEY";
 const static std::string S3_ENDPOINT = "AWS_ENDPOINT";
 const static std::string S3_REGION = "AWS_REGION";
+const static std::string S3_TOKEN = "AWS_TOKEN";
 const static std::string S3_MAX_CONN_SIZE = "AWS_MAX_CONN_SIZE";
 const static std::string S3_REQUEST_TIMEOUT_MS = "AWS_REQUEST_TIMEOUT_MS";
 const static std::string S3_CONN_TIMEOUT_MS = "AWS_CONN_TIMEOUT_MS";
diff --git a/docs/en/docs/data-operate/import/import-scenes/external-storage-load.md b/docs/en/docs/data-operate/import/import-scenes/external-storage-load.md
index 0074c591f3..e06db56de4 100644
--- a/docs/en/docs/data-operate/import/import-scenes/external-storage-load.md
+++ b/docs/en/docs/data-operate/import/import-scenes/external-storage-load.md
@@ -162,7 +162,7 @@ example:
 
 ### FAQ
 
-S3 SDK uses virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style:
+1. S3 SDK uses virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style:
 
 ```
    WITH S3
@@ -174,3 +174,17 @@ S3 SDK uses virtual-hosted style by default. However, some object storage system
          "use_path_style" = "true"
    )
 ```
+
+<version since="1.2"></version>
+2. Support using temporary security credentials to access object stores that support the S3 protocol:
+
+```
+  WITH S3
+  (
+        "AWS_ENDPOINT" = "AWS_ENDPOINT",
+        "AWS_ACCESS_KEY" = "AWS_TEMP_ACCESS_KEY",
+        "AWS_SECRET_KEY" = "AWS_TEMP_SECRET_KEY",
+        "AWS_TOKEN" = "AWS_TEMP_TOKEN",
+        "AWS_REGION" = "AWS_REGION"
+  )
+```
diff --git a/docs/en/docs/data-operate/import/import-way/s3-load-manual.md b/docs/en/docs/data-operate/import/import-way/s3-load-manual.md
index 815c2ba2fa..9f9a640324 100644
--- a/docs/en/docs/data-operate/import/import-way/s3-load-manual.md
+++ b/docs/en/docs/data-operate/import/import-way/s3-load-manual.md
@@ -80,7 +80,7 @@ example:
 
 ## FAQ
 
-S3 SDK uses virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style:
+1. S3 SDK uses virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style:
 
 ```text
    WITH S3
@@ -92,3 +92,17 @@ S3 SDK uses virtual-hosted style by default. However, some object storage system
          "use_path_style" = "true"
    )
 ```
+
+<version since="1.2"></version>
+2. Support using temporary security credentials to access object stores that support the S3 protocol:
+
+```
+  WITH S3
+  (
+        "AWS_ENDPOINT" = "AWS_ENDPOINT",
+        "AWS_ACCESS_KEY" = "AWS_TEMP_ACCESS_KEY",
+        "AWS_SECRET_KEY" = "AWS_TEMP_SECRET_KEY",
+        "AWS_TOKEN" = "AWS_TEMP_TOKEN",
+        "AWS_REGION" = "AWS_REGION"
+  )
+```
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
index 359aabd01f..f191be3157 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
@@ -139,6 +139,23 @@ PROPERTIES
 );
 ```
 
+<version since="1.2"></version>
+7. Create a repository named minio_repo via temporary security credentials.
+
+```
+CREATE REPOSITORY `minio_repo`
+WITH S3
+ON LOCATION "s3://minio_repo"
+PROPERTIES
+( 
+    "AWS_ENDPOINT" = "AWS_ENDPOINT",
+    "AWS_ACCESS_KEY" = "AWS_TEMP_ACCESS_KEY",
+    "AWS_SECRET_KEY" = "AWS_TEMP_SECRET_KEY",
+    "AWS_TOKEN" = "AWS_TEMP_TOKEN",
+    "AWS_REGION" = "AWS_REGION"
+)
+```
+
 ### Keywords
 
     CREATE, REPOSITORY
diff --git a/docs/zh-CN/docs/data-operate/import/import-scenes/external-storage-load.md b/docs/zh-CN/docs/data-operate/import/import-scenes/external-storage-load.md
index 88c5a271e7..18b3002e01 100644
--- a/docs/zh-CN/docs/data-operate/import/import-scenes/external-storage-load.md
+++ b/docs/zh-CN/docs/data-operate/import/import-scenes/external-storage-load.md
@@ -168,7 +168,7 @@ Hdfs load 创建导入语句,导入方式和[Broker Load](../../../data-operat
 
 ### 常见问题
 
-S3 SDK 默认使用 `virtual-hosted style` 方式。但某些对象存储系统可能没开启或没支持 `virtual-hosted style` 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 `path style` 方式:
+1. S3 SDK 默认使用 `virtual-hosted style` 方式。但某些对象存储系统可能没开启或没支持 `virtual-hosted style` 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 `path style` 方式:
 
 ```
   WITH S3
@@ -180,3 +180,17 @@ S3 SDK 默认使用 `virtual-hosted style` 方式。但某些对象存储系统
         "use_path_style" = "true"
   )
 ```
+
+<version since="1.2"></version>
+2. 支持使用临时秘钥(TOKEN) 访问所有支持 S3 协议的对象存储,用法如下:
+
+```
+  WITH S3
+  (
+        "AWS_ENDPOINT" = "AWS_ENDPOINT",
+        "AWS_ACCESS_KEY" = "AWS_TEMP_ACCESS_KEY",
+        "AWS_SECRET_KEY" = "AWS_TEMP_SECRET_KEY",
+        "AWS_TOKEN" = "AWS_TEMP_TOKEN",
+        "AWS_REGION" = "AWS_REGION"
+  )
+```
diff --git a/docs/zh-CN/docs/data-operate/import/import-way/s3-load-manual.md b/docs/zh-CN/docs/data-operate/import/import-way/s3-load-manual.md
index 953370c596..b5736ec318 100644
--- a/docs/zh-CN/docs/data-operate/import/import-way/s3-load-manual.md
+++ b/docs/zh-CN/docs/data-operate/import/import-way/s3-load-manual.md
@@ -80,7 +80,7 @@ under the License.
 
 ## 常见问题
 
-S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持 virtual-hosted style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 path style 方式:
+1. S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持 virtual-hosted style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 path style 方式:
 
 ```text
   WITH S3
@@ -93,5 +93,17 @@ S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可
   )
 ```
 
+<version since="1.2"></version>
+2. 支持使用临时秘钥(TOKEN) 访问所有支持 S3 协议的对象存储,用法如下:
 
+```
+  WITH S3
+  (
+        "AWS_ENDPOINT" = "AWS_ENDPOINT",
+        "AWS_ACCESS_KEY" = "AWS_TEMP_ACCESS_KEY",
+        "AWS_SECRET_KEY" = "AWS_TEMP_SECRET_KEY",
+        "AWS_TOKEN" = "AWS_TEMP_TOKEN",
+        "AWS_REGION" = "AWS_REGION"
+  )
+```
 
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
index b61c24dcb4..710e9b46aa 100644
--- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
@@ -136,6 +136,23 @@ PROPERTIES
     "use_path_style" = "true"
 );
 ```
+<version since="1.2"></version>
+7. 使用临时秘钥创建名为 minio_repo 的仓库
+
+```
+CREATE REPOSITORY `minio_repo`
+WITH S3
+ON LOCATION "s3://minio_repo"
+PROPERTIES
+(
+    "AWS_ENDPOINT" = "AWS_ENDPOINT",
+    "AWS_ACCESS_KEY" = "AWS_TEMP_ACCESS_KEY",
+    "AWS_SECRET_KEY" = "AWS_TEMP_SECRET_KEY",
+    "AWS_TOKEN" = "AWS_TEMP_TOKEN",
+    "AWS_REGION" = "AWS_REGION"
+)
+```
+
 
 ### Keywords
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
index 17833b1d68..d0828ca87e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/S3Storage.java
@@ -34,6 +34,7 @@ import org.apache.http.client.utils.URIBuilder;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
 import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
 import software.amazon.awssdk.auth.signer.AwsS3V4Signer;
 import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
@@ -148,10 +149,19 @@ public class S3Storage extends BlobStorage {
         if (client == null) {
             checkS3(caseInsensitiveProperties);
             URI tmpEndpoint = URI.create(caseInsensitiveProperties.get(S3Resource.S3_ENDPOINT));
-            AwsBasicCredentials awsBasic = AwsBasicCredentials.create(
-                    caseInsensitiveProperties.get(S3Resource.S3_ACCESS_KEY),
-                    caseInsensitiveProperties.get(S3Resource.S3_SECRET_KEY));
-            StaticCredentialsProvider scp = StaticCredentialsProvider.create(awsBasic);
+            StaticCredentialsProvider scp;
+            if (!caseInsensitiveProperties.containsKey(S3Resource.S3_TOKEN)) {
+                AwsBasicCredentials awsBasic = AwsBasicCredentials.create(
+                        caseInsensitiveProperties.get(S3Resource.S3_ACCESS_KEY),
+                        caseInsensitiveProperties.get(S3Resource.S3_SECRET_KEY));
+                scp = StaticCredentialsProvider.create(awsBasic);
+            } else {
+                AwsSessionCredentials awsSession = AwsSessionCredentials.create(
+                        caseInsensitiveProperties.get(S3Resource.S3_ACCESS_KEY),
+                        caseInsensitiveProperties.get(S3Resource.S3_SECRET_KEY),
+                        caseInsensitiveProperties.get(S3Resource.S3_TOKEN));
+                scp = StaticCredentialsProvider.create(awsSession);
+            }
             EqualJitterBackoffStrategy backoffStrategy = EqualJitterBackoffStrategy
                     .builder()
                     .baseDelay(Duration.ofSeconds(1))
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java
index 18fae53ad3..b411c2f46c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java
@@ -67,6 +67,7 @@ public class S3Resource extends Resource {
     public static final String S3_BUCKET = "AWS_BUCKET";
 
     // optional
+    public static final String S3_TOKEN = "AWS_TOKEN";
     public static final String USE_PATH_STYLE = "use_path_style";
     public static final String S3_MAX_CONNECTIONS = "AWS_MAX_CONNECTIONS";
     public static final String S3_REQUEST_TIMEOUT_MS = "AWS_REQUEST_TIMEOUT_MS";
@@ -191,6 +192,13 @@ public class S3Resource extends Resource {
         } else {
             s3Properties.put("fs.s3a.path.style.access", "false");
         }
+        if (properties.containsKey(S3Resource.S3_TOKEN)) {
+            s3Properties.put("fs.s3a.session.token", properties.get(S3_TOKEN));
+            s3Properties.put("fs.s3a.aws.credentials.provider",
+                    "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider");
+            s3Properties.put("fs.s3a.impl.disable.cache", "true");
+            s3Properties.put("fs.s3.impl.disable.cache", "true");
+        }
         for (Map.Entry<String, String> entry : properties.entrySet()) {
             if (entry.getKey().startsWith(S3Resource.S3_FS_PREFIX)) {
                 s3Properties.put(entry.getKey(), entry.getValue());


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 03/03: [fix](inbitmap) fix core dump caused by bitmap filter with union (#15333)

Posted by mo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit caeed14e6d9af45a190e4625d0bf62f087e47a79
Author: luozenglin <37...@users.noreply.github.com>
AuthorDate: Mon Dec 26 23:14:32 2022 +0800

    [fix](inbitmap) fix core dump caused by bitmap filter with union (#15333)
    
    The join node need project operation to remove unnecessary columns from the output tuples.
    For SetOperationNode output tuple and input tuple is consistent and do not need project,
    but the children of SetOperationNode may be join nodes, so the children of the SetOperationNode
    need to do the project operation.
---
 .../doris/analysis/BitmapFilterPredicate.java      |  5 +++++
 .../org/apache/doris/planner/OriginalPlanner.java  |  2 +-
 .../org/apache/doris/planner/SetOperationNode.java | 26 ++++++++++++++++++++++
 .../data/query_p0/join/test_bitmap_filter.out      | 16 +++++++++++++
 .../suites/query_p0/join/test_bitmap_filter.groovy |  5 +++++
 5 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BitmapFilterPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BitmapFilterPredicate.java
index 1802f0917b..8e6b703977 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BitmapFilterPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BitmapFilterPredicate.java
@@ -77,6 +77,11 @@ public class BitmapFilterPredicate extends Predicate {
                     + "Please `set runtime_filter_type = 'xxx, bitmap_filter'` first.");
         }
 
+        if (ConnectContext.get() == null || !ConnectContext.get().getSessionVariable().isEnableProjection()) {
+            throw new AnalysisException(
+                    "Please enable the session variable 'enable_projection' through `set enable_projection = true;`");
+        }
+
         if (!VectorizedUtil.isVectorized()) {
             throw new AnalysisException("In bitmap syntax is currently only supported in the vectorization engine.");
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
index fc7c1b7b80..809bb645b3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
@@ -153,7 +153,7 @@ public class OriginalPlanner extends Planner {
 
         if (analyzer.getContext() != null
                 && analyzer.getContext().getSessionVariable().isEnableProjection()
-                && statement instanceof SelectStmt) {
+                && statement instanceof QueryStmt) {
             ProjectPlanner projectPlanner = new ProjectPlanner(analyzer);
             projectPlanner.projectSingleNodePlan(queryStmt.getResultExprs(), singleNodePlan);
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java
index e79bfe7cf4..5d806215d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SetOperationNode.java
@@ -20,6 +20,7 @@ package org.apache.doris.planner;
 import org.apache.doris.analysis.Analyzer;
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.analysis.SlotDescriptor;
+import org.apache.doris.analysis.SlotId;
 import org.apache.doris.analysis.SlotRef;
 import org.apache.doris.analysis.TupleDescriptor;
 import org.apache.doris.analysis.TupleId;
@@ -38,12 +39,14 @@ import org.apache.doris.thrift.TUnionNode;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import org.apache.commons.collections.CollectionUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 /**
@@ -449,6 +452,29 @@ public abstract class SetOperationNode extends PlanNode {
         return numInstances;
     }
 
+    public void initOutputSlotIds(Set<SlotId> requiredSlotIdSet, Analyzer analyzer) {
+    }
+
+    public void projectOutputTuple() {
+    }
+
+    public Set<SlotId> computeInputSlotIds(Analyzer analyzer) {
+        Set<SlotId> results = Sets.newHashSet();
+        for (int i = 0; i < resultExprLists.size(); ++i) {
+            List<Expr> substituteList =
+                    Expr.substituteList(resultExprLists.get(i), children.get(i).getOutputSmap(), analyzer, true);
+            for (Expr expr : substituteList) {
+                List<SlotId> slotIdList = Lists.newArrayList();
+                expr.getIds(null, slotIdList);
+                results.addAll(slotIdList);
+            }
+        }
+        return results;
+    }
+
+    /**
+     * just for Nereids.
+     */
     public void finalizeForNereids(TupleDescriptor tupleDescriptor, List<SlotDescriptor> constExprSlots) {
         materializedConstExprLists.clear();
         for (List<Expr> exprList : constExprLists) {
diff --git a/regression-test/data/query_p0/join/test_bitmap_filter.out b/regression-test/data/query_p0/join/test_bitmap_filter.out
index bf8be1dca2..9484c6770d 100644
--- a/regression-test/data/query_p0/join/test_bitmap_filter.out
+++ b/regression-test/data/query_p0/join/test_bitmap_filter.out
@@ -75,3 +75,19 @@
 2015-04-02
 2015-04-02
 
+-- !sql12 --
+1
+3
+5
+7
+9
+10
+11
+12
+13
+14
+255
+1985
+1991
+32767
+
diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
index d73de718ac..5caf5f7ac6 100644
--- a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
+++ b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
@@ -61,6 +61,11 @@ suite("test_bitmap_filter", "query_p0") {
 
     qt_sql11 "select k10 from ${tbl1} where cast(k10 as bigint) in (select bitmap_or(k2, to_bitmap(20120314)) from ${tbl2} b) order by 1;"
 
+    qt_sql12 """
+        with w1 as (select k1 from ${tbl1} where k1 in (select k2 from ${tbl2})), w2 as (select k2 from ${tbl1} where k2 in (select k3 from ${tbl2})) 
+        select * from (select * from w1 union select * from w2) tmp order by 1;
+    """
+
     test {
         sql "select k1, k2 from ${tbl1} b1 where k1 in (select k2 from ${tbl2} b2 where b1.k2 = b2.k1) order by k1;"
         exception "In bitmap does not support correlated subquery"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org