You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/01/25 00:22:25 UTC

[impala] 02/02: IMPALA-8090: race when reusing ScanRange in test

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit aa603d8cb9e870919b60d80828951beee1db6622
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Jan 17 15:49:42 2019 -0800

    IMPALA-8090: race when reusing ScanRange in test
    
    The issue occurs in test code where two reads are issued
    with the same ScanRange object back-to-back, e.g. in
    SyncReadTest. The DiskIoMgr enqueues the last buffer
    or cancels the scan range *before* closing the
    'file_reader_', which means that the client thread
    thinks the ScanRange is done and can re-enqueue
    it into the DiskIoMgr.
    
    This is not an issue in Impala itself because scan
    ranges are not reused in this fashion.
    
    I evaluated adding a DCHECK but the lifecycle
    of the ScanRange objects is complicated enough
    that there wasn't a straightforward invariant
    to enforce.
    
    Testing:
    Looped the previously-failing test overnight.
    
    Change-Id: I3122e5b2efea60ffe82d780930301d5be108876b
    Reviewed-on: http://gerrit.cloudera.org:8080/12238
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/io/request-ranges.h |  4 ++++
 be/src/runtime/io/scan-range.cc    | 18 ++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/be/src/runtime/io/request-ranges.h b/be/src/runtime/io/request-ranges.h
index 1dc451a..0dddac5 100644
--- a/be/src/runtime/io/request-ranges.h
+++ b/be/src/runtime/io/request-ranges.h
@@ -245,6 +245,10 @@ class ScanRange : public RequestRange {
   /// the read did not come from a local disk. 'buffer_opts' specifies buffer management
   /// options - see the DiskIoMgr class comment and the BufferOpts comments for details.
   /// 'meta_data' is an arbitrary client-provided pointer for any auxiliary data.
+  ///
+  /// TODO: IMPALA-4249: clarify if a ScanRange can be reused after Reset(). Currently
+  /// it is not generally safe to do so, but some unit tests reuse ranges after
+  /// successfully reading to eos.
   void Reset(hdfsFS fs, const char* file, int64_t len, int64_t offset, int disk_id,
       bool expected_local, const BufferOpts& buffer_opts, void* meta_data = nullptr);
 
diff --git a/be/src/runtime/io/scan-range.cc b/be/src/runtime/io/scan-range.cc
index b0aebbd..b98763d 100644
--- a/be/src/runtime/io/scan-range.cc
+++ b/be/src/runtime/io/scan-range.cc
@@ -239,8 +239,8 @@ ReadOutcome ScanRange::DoRead(int disk_id) {
     // Propagate 'read_status' to the scan range. This will also wake up any waiting
     // threads.
     CancelInternal(read_status, true);
-    // No more reads for this scan range - we can close it.
-    file_reader_->Close();
+    // At this point we cannot touch the state of this range because the client
+    // may notice cancellation, then reuse the scan range.
     return ReadOutcome::CANCELLED;
   }
 
@@ -253,14 +253,13 @@ ReadOutcome ScanRange::DoRead(int disk_id) {
   // After calling EnqueueReadyBuffer(), it is no longer valid to touch 'buffer_desc'.
   // Store the state we need before calling EnqueueReadyBuffer().
   bool eosr = buffer_desc->eosr();
+  // No more reads for this scan range - we can close it.
+  if (eosr) file_reader_->Close();
   // Read successful - enqueue the buffer and return the appropriate outcome.
   if (!EnqueueReadyBuffer(move(buffer_desc))) return ReadOutcome::CANCELLED;
-  if (eosr) {
-    // No more reads for this scan range - we can close it.
-    file_reader_->Close();
-    return ReadOutcome::SUCCESS_EOSR;
-  }
-  return ReadOutcome::SUCCESS_NO_EOSR;
+  // At this point, if eosr=true, then we cannot touch the state of this scan range
+  // because the client may notice eos, then reuse the scan range.
+  return eosr ? ReadOutcome::SUCCESS_EOSR : ReadOutcome::SUCCESS_NO_EOSR;
 }
 
 Status ScanRange::ReadSubRanges(BufferDescriptor* buffer_desc, bool* eof) {
@@ -375,6 +374,9 @@ void ScanRange::CancelInternal(const Status& status, bool read_error) {
 
   // For cached buffers, we can't close the range until the cached buffer is returned.
   // Close() is called from ScanRange::CleanUpBufferLocked().
+  // TODO: IMPALA-4249 - this Close() call makes it unsafe to reuse a cancelled scan
+  // range, because there is no synchronisation between this Close() call and the
+  // client adding the ScanRange back into the IoMgr.
   if (external_buffer_tag_ != ExternalBufferTag::CACHED_BUFFER) file_reader_->Close();
 }