You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/19 05:10:41 UTC

[doris] 07/21: [fix](multi catalog)Return emtpy block while external table scanner couldn't find the file (#14997)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d41a963c8ff0c971baa9080417b343601f8a8ade
Author: Jibing-Li <64...@users.noreply.github.com>
AuthorDate: Fri Dec 16 09:36:35 2022 +0800

    [fix](multi catalog)Return emtpy block while external table scanner couldn't find the file (#14997)
    
    FE file path cache for external table may out of date. In this case, BE may fail to find the not exist file from FE cache.
    This pr is to handle this case: instead of throw an error message to the user, we return empty result set to the user.
---
 be/src/io/hdfs_file_reader.cpp             |  3 +++
 be/src/vec/exec/scan/scanner_scheduler.cpp | 12 +++++++++++-
 be/src/vec/exec/scan/vfile_scanner.cpp     |  3 +++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/be/src/io/hdfs_file_reader.cpp b/be/src/io/hdfs_file_reader.cpp
index d57c489691..78a21ac477 100644
--- a/be/src/io/hdfs_file_reader.cpp
+++ b/be/src/io/hdfs_file_reader.cpp
@@ -72,6 +72,9 @@ Status HdfsFileReader::open() {
 
     RETURN_IF_ERROR(HdfsFsCache::instance()->get_connection(_hdfs_params, &_fs_handle));
     _hdfs_fs = _fs_handle->hdfs_fs;
+    if (hdfsExists(_hdfs_fs, _path.c_str()) != 0) {
+        return Status::NotFound("{} not exists!", _path);
+    }
     _hdfs_file = hdfsOpenFile(_hdfs_fs, _path.c_str(), O_RDONLY, 0, 0, 0);
     if (_hdfs_file == nullptr) {
         if (_fs_handle->from_cache) {
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp
index f2170a9599..d45adc13b9 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -26,6 +26,7 @@
 #include "vec/core/block.h"
 #include "vec/exec/scan/vscanner.h"
 #include "vec/exprs/vexpr.h"
+#include "vfile_scanner.h"
 
 namespace doris::vectorized {
 
@@ -232,12 +233,21 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext
         auto block = ctx->get_free_block(&get_free_block);
         status = scanner->get_block(state, block, &eos);
         VLOG_ROW << "VOlapScanNode input rows: " << block->rows() << ", eos: " << eos;
-        if (!status.ok()) {
+        // The VFileScanner for external table may try to open not exist files,
+        // Because FE file cache for external table may out of date.
+        if (!status.ok() && (typeid(*scanner) == typeid(doris::vectorized::VFileScanner) &&
+                             !status.is<ErrorCode::NOT_FOUND>())) {
             LOG(WARNING) << "Scan thread read VOlapScanner failed: " << status.to_string();
             // Add block ptr in blocks, prevent mem leak in read failed
             blocks.push_back(block);
             break;
         }
+        if (status.is<ErrorCode::NOT_FOUND>()) {
+            // The only case in this if branch is external table file delete and fe cache has not been updated yet.
+            // Set status to OK.
+            status = Status::OK();
+            eos = true;
+        }
 
         raw_bytes_read += block->bytes();
         num_rows_in_block += block->rows();
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp
index ee1605a6e0..b478d5c96e 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -530,6 +530,9 @@ Status VFileScanner::_get_next_reader() {
         if (init_status.is_end_of_file()) {
             continue;
         } else if (!init_status.ok()) {
+            if (init_status.is<ErrorCode::NOT_FOUND>()) {
+                return init_status;
+            }
             return Status::InternalError("failed to init reader for file {}, err: {}", range.path,
                                          init_status.get_error_msg());
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org