You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/01/25 00:22:25 UTC
[impala] 02/02: IMPALA-8090: race when reusing ScanRange in test
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit aa603d8cb9e870919b60d80828951beee1db6622
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Jan 17 15:49:42 2019 -0800
IMPALA-8090: race when reusing ScanRange in test
The issue occurs in test code where two reads are issued
with the same ScanRange object back-to-back, e.g. in
SyncReadTest. The DiskIoMgr enqueues the last buffer
or cancels the scan range *before* closing the
'file_reader_', which means that the client thread
thinks the ScanRange is done and can re-enqueue
it into the DiskIoMgr.
This is not an issue in Impala itself because scan
ranges are not reused in this fashion.
I evaluated adding a DCHECK but the lifecycle
of the ScanRange objects is complicated enough
that there wasn't a straightforward invariant
to enforce.
Testing:
Looped the previously-failing test overnight.
Change-Id: I3122e5b2efea60ffe82d780930301d5be108876b
Reviewed-on: http://gerrit.cloudera.org:8080/12238
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/io/request-ranges.h | 4 ++++
be/src/runtime/io/scan-range.cc | 18 ++++++++++--------
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/be/src/runtime/io/request-ranges.h b/be/src/runtime/io/request-ranges.h
index 1dc451a..0dddac5 100644
--- a/be/src/runtime/io/request-ranges.h
+++ b/be/src/runtime/io/request-ranges.h
@@ -245,6 +245,10 @@ class ScanRange : public RequestRange {
/// the read did not come from a local disk. 'buffer_opts' specifies buffer management
/// options - see the DiskIoMgr class comment and the BufferOpts comments for details.
/// 'meta_data' is an arbitrary client-provided pointer for any auxiliary data.
+ ///
+ /// TODO: IMPALA-4249: clarify if a ScanRange can be reused after Reset(). Currently
+ /// it is not generally safe to do so, but some unit tests reuse ranges after
+ /// successfully reading to eos.
void Reset(hdfsFS fs, const char* file, int64_t len, int64_t offset, int disk_id,
bool expected_local, const BufferOpts& buffer_opts, void* meta_data = nullptr);
diff --git a/be/src/runtime/io/scan-range.cc b/be/src/runtime/io/scan-range.cc
index b0aebbd..b98763d 100644
--- a/be/src/runtime/io/scan-range.cc
+++ b/be/src/runtime/io/scan-range.cc
@@ -239,8 +239,8 @@ ReadOutcome ScanRange::DoRead(int disk_id) {
// Propagate 'read_status' to the scan range. This will also wake up any waiting
// threads.
CancelInternal(read_status, true);
- // No more reads for this scan range - we can close it.
- file_reader_->Close();
+ // At this point we cannot touch the state of this range because the client
+ // may notice cancellation, then reuse the scan range.
return ReadOutcome::CANCELLED;
}
@@ -253,14 +253,13 @@ ReadOutcome ScanRange::DoRead(int disk_id) {
// After calling EnqueueReadyBuffer(), it is no longer valid to touch 'buffer_desc'.
// Store the state we need before calling EnqueueReadyBuffer().
bool eosr = buffer_desc->eosr();
+ // No more reads for this scan range - we can close it.
+ if (eosr) file_reader_->Close();
// Read successful - enqueue the buffer and return the appropriate outcome.
if (!EnqueueReadyBuffer(move(buffer_desc))) return ReadOutcome::CANCELLED;
- if (eosr) {
- // No more reads for this scan range - we can close it.
- file_reader_->Close();
- return ReadOutcome::SUCCESS_EOSR;
- }
- return ReadOutcome::SUCCESS_NO_EOSR;
+ // At this point, if eosr=true, then we cannot touch the state of this scan range
+ // because the client may notice eos, then reuse the scan range.
+ return eosr ? ReadOutcome::SUCCESS_EOSR : ReadOutcome::SUCCESS_NO_EOSR;
}
Status ScanRange::ReadSubRanges(BufferDescriptor* buffer_desc, bool* eof) {
@@ -375,6 +374,9 @@ void ScanRange::CancelInternal(const Status& status, bool read_error) {
// For cached buffers, we can't close the range until the cached buffer is returned.
// Close() is called from ScanRange::CleanUpBufferLocked().
+ // TODO: IMPALA-4249 - this Close() call makes it unsafe to reuse a cancelled scan
+ // range, because there is no synchronisation between this Close() call and the
+ // client adding the ScanRange back into the IoMgr.
if (external_buffer_tag_ != ExternalBufferTag::CACHED_BUFFER) file_reader_->Close();
}