You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/08/24 01:09:24 UTC
[2/2] impala git commit: IMPALA-7418: Return first non-ok status from
HdfsScanNode::GetNext()
IMPALA-7418: Return first non-ok status from HdfsScanNode::GetNext()
Previously, HdfsScanNode::GetNext passed the status returned by
IssueInitialScanRanges() without inspecting the
HdfsScanNodeBase::status_. This resulted in the error status being
lost in case a scanner thread hit an error and cancelled the scan.
This change ensures that GetNext() returns the first non-ok status
set in HdfsScanNode.
Testing: Added sleeps to the IssueInitialRanges() to cause
deterministic failures of test_udf_errors and then applied this
patch to it. It passes all the tests despite the sleeps.
Change-Id: I4569cc7b0843a29c617a094e590c31f7c648ff45
Reviewed-on: http://gerrit.cloudera.org:8080/11296
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1d84d685
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1d84d685
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1d84d685
Branch: refs/heads/master
Commit: 1d84d68559735e1a20dff8bfa43e3b14334ce9df
Parents: 0d38082
Author: poojanilangekar <po...@cloudera.com>
Authored: Wed Aug 22 13:31:14 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Aug 24 00:11:53 2018 +0000
----------------------------------------------------------------------
be/src/exec/hdfs-scan-node-base.h | 2 ++
be/src/exec/hdfs-scan-node.cc | 13 ++++++++++++-
2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/1d84d685/be/src/exec/hdfs-scan-node-base.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h
index e044ddf..b63d9c5 100644
--- a/be/src/exec/hdfs-scan-node-base.h
+++ b/be/src/exec/hdfs-scan-node-base.h
@@ -541,6 +541,8 @@ class HdfsScanNodeBase : public ScanNode {
/// Performs dynamic partition pruning, i.e., applies runtime filters to files, and
/// issues initial ranges for all file types. Waits for runtime filters if necessary.
/// Only valid to call if !initial_ranges_issued_. Sets initial_ranges_issued_ to true.
+ /// A non-ok status is returned only if it encounters an invalid scan range or if a
+ /// scanner thread cancels the scan when it runs into an error.
Status IssueInitialScanRanges(RuntimeState* state) WARN_UNUSED_RESULT;
/// Gets the next scan range to process and allocates buffer for it. 'reservation' is
http://git-wip-us.apache.org/repos/asf/impala/blob/1d84d685/be/src/exec/hdfs-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node.cc b/be/src/exec/hdfs-scan-node.cc
index 7e99fea..3755e50 100644
--- a/be/src/exec/hdfs-scan-node.cc
+++ b/be/src/exec/hdfs-scan-node.cc
@@ -84,7 +84,18 @@ Status HdfsScanNode::GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos
// so we need to tell them there is work to do.
// TODO: This is probably not worth splitting the organisational cost of splitting
// initialisation across two places. Move to before the scanner threads start.
- RETURN_IF_ERROR(IssueInitialScanRanges(state));
+ Status status = IssueInitialScanRanges(state);
+ if (!status.ok()) {
+ // If the status returned is CANCELLED, it could be because the
+ // reader_context_ was cancelled by a scanner thread which hit an error. In this
+ // case, the scanner thread's error must take precedence. In other cases,
+ // the non-ok status represents the error in ValidateScanRange() or describes
+ // the unsupported compression formats. For such non-CANCELLED cases, the status
+ // returned by IssueInitialScanRanges() takes precedence.
+ unique_lock<mutex> l(lock_);
+ if (status.IsCancelled() && !status_.ok()) return status_;
+ return status;
+ }
// Release the scanner threads
discard_result(ranges_issued_barrier_.Notify());