You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/11/01 16:04:10 UTC

[5/7] impala git commit: IMPALA-7363: Fix buffer usage in ScannerContext::Stream::ReadVLong()

IMPALA-7363: Fix buffer usage in ScannerContext::Stream::ReadVLong()

Previously, the ReadVLong() function would read a byte and use it
after subsequent calls to ReadBytes() function. As a result, the
value was invalidated depending on the contents of the
boundary_buffer_. This change ensures that all the functions on
the buffer are invoked before subsequent calls to ReadBytes().

Testing:
Enabled test_tpch_scan_ranges() for sequence files and ran it
multiple times to ensure it always produces the desired result.
Manually verified the buffer usage of other calls to ReadBytes()
and GetBytes().

Change-Id: Ic2b2ffe4b1d67c63cfcea2baedbeff48e65ca417
Reviewed-on: http://gerrit.cloudera.org:8080/11828
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2057931f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2057931f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2057931f

Branch: refs/heads/master
Commit: 2057931fbe10474b542dd13dd17acef22c29623e
Parents: 8552615
Author: poojanilangekar <po...@cloudera.com>
Authored: Tue Oct 30 14:00:25 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Nov 1 04:41:01 2018 +0000

----------------------------------------------------------------------
 be/src/exec/scanner-context.h        | 13 ++++++++-----
 be/src/exec/scanner-context.inline.h |  6 +++---
 tests/query_test/test_scanners.py    |  3 ---
 3 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2057931f/be/src/exec/scanner-context.h
----------------------------------------------------------------------
diff --git a/be/src/exec/scanner-context.h b/be/src/exec/scanner-context.h
index 0bb8d74..9680778 100644
--- a/be/src/exec/scanner-context.h
+++ b/be/src/exec/scanner-context.h
@@ -108,9 +108,10 @@ class ScannerContext {
     ///  - If peek is true, the scan range position is not incremented (i.e. repeated calls
     ///    with peek = true will return the same data).
     ///  - *buffer on return is a pointer to the buffer.  The memory is owned by
-    ///    the ScannerContext and should not be modified.  If the buffer is entirely
-    ///    from one disk io buffer, a pointer inside that buffer is returned directly.
-    ///    If the requested buffer straddles io buffers, a copy is done here.
+    ///    the ScannerContext and should not be modified. The contents of the buffer are
+    ///    invalidated after subsequent calls to GetBytes()/ReadBytes(). If the buffer
+    ///    is entirely from one disk io buffer, a pointer inside that buffer is returned
+    ///    directly. If the requested buffer straddles io buffers, a copy is done here.
     ///  - *out_len is the number of bytes returned.
     ///  - *status is set if there is an error.
     /// Returns true if the call was successful (i.e. status->ok())
@@ -191,8 +192,10 @@ class ScannerContext {
     bool SkipBytes(int64_t length, Status* status) WARN_UNUSED_RESULT;
 
     /// Read length bytes into the supplied buffer.  The returned buffer is owned
-    /// by this object. Returns true on success, otherwise returns false and sets 'status'
-    /// to indicate the error.
+    /// by this object The memory is owned by and should not be modified. The contents
+    /// of the buffer are invalidated after subsequent calls to GetBytes()/ReadBytes().
+    /// Returns true on success, otherwise returns false and sets 'status' to
+    /// indicate the error.
     bool ReadBytes(int64_t length, uint8_t** buf, Status* status, bool peek = false)
         WARN_UNUSED_RESULT;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/2057931f/be/src/exec/scanner-context.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/scanner-context.inline.h b/be/src/exec/scanner-context.inline.h
index 19ea1f4..b3ff372 100644
--- a/be/src/exec/scanner-context.inline.h
+++ b/be/src/exec/scanner-context.inline.h
@@ -146,6 +146,7 @@ inline bool ScannerContext::Stream::ReadVLong(int64_t* value, Status* status) {
   RETURN_IF_FALSE(ReadBytes(1, reinterpret_cast<uint8_t**>(&firstbyte), status));
 
   int len = ReadWriteUtil::DecodeVIntSize(*firstbyte);
+  bool is_negative = ReadWriteUtil::IsNegativeVInt(*firstbyte);
   if (len > ReadWriteUtil::MAX_VINT_LEN) {
     *status = Status("ReadVLong: size is too big");
     return false;
@@ -165,9 +166,8 @@ inline bool ScannerContext::Stream::ReadVLong(int64_t* value, Status* status) {
     *value = (*value << 8) | (bytes[i] & 0xFF);
   }
 
-  if (ReadWriteUtil::IsNegativeVInt(*firstbyte)) {
-    *value = *value ^ (static_cast<int64_t>(-1));
-  }
+  if (is_negative) *value = *value ^ (static_cast<int64_t>(-1));
+
   return true;
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/2057931f/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index e8a24b8..91f0449 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -778,9 +778,6 @@ class TestTpchScanRangeLengths(ImpalaTestSuite):
     super(TestTpchScanRangeLengths, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('scan_range_length', *TPCH_SCAN_RANGE_LENGTHS))
-    # IMPALA-7360: sequence file scan returns spurious errors
-    cls.ImpalaTestMatrix.add_constraint(
-        lambda v: v.get_value('table_format').file_format != 'seq')
 
   def test_tpch_scan_ranges(self, vector):
     # Randomly adjust the scan range length to exercise different code paths.