You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/11/01 16:04:10 UTC
[5/7] impala git commit: IMPALA-7363: Fix buffer usage in
ScannerContext::Stream::ReadVLong()
IMPALA-7363: Fix buffer usage in ScannerContext::Stream::ReadVLong()
Previously, the ReadVLong() function would read a byte and use it
after subsequent calls to ReadBytes() function. As a result, the
value was invalidated depending on the contents of the
boundary_buffer_. This change ensures that all the functions on
the buffer are invoked before subsequent calls to ReadBytes().
Testing:
Enabled test_tpch_scan_ranges() for sequence files and ran it
multiple times to ensure it always produces the desired result.
Manually verified the buffer usage of other calls to ReadBytes()
and GetBytes().
Change-Id: Ic2b2ffe4b1d67c63cfcea2baedbeff48e65ca417
Reviewed-on: http://gerrit.cloudera.org:8080/11828
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2057931f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2057931f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2057931f
Branch: refs/heads/master
Commit: 2057931fbe10474b542dd13dd17acef22c29623e
Parents: 8552615
Author: poojanilangekar <po...@cloudera.com>
Authored: Tue Oct 30 14:00:25 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Nov 1 04:41:01 2018 +0000
----------------------------------------------------------------------
be/src/exec/scanner-context.h | 13 ++++++++-----
be/src/exec/scanner-context.inline.h | 6 +++---
tests/query_test/test_scanners.py | 3 ---
3 files changed, 11 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/2057931f/be/src/exec/scanner-context.h
----------------------------------------------------------------------
diff --git a/be/src/exec/scanner-context.h b/be/src/exec/scanner-context.h
index 0bb8d74..9680778 100644
--- a/be/src/exec/scanner-context.h
+++ b/be/src/exec/scanner-context.h
@@ -108,9 +108,10 @@ class ScannerContext {
/// - If peek is true, the scan range position is not incremented (i.e. repeated calls
/// with peek = true will return the same data).
/// - *buffer on return is a pointer to the buffer. The memory is owned by
- /// the ScannerContext and should not be modified. If the buffer is entirely
- /// from one disk io buffer, a pointer inside that buffer is returned directly.
- /// If the requested buffer straddles io buffers, a copy is done here.
+ /// the ScannerContext and should not be modified. The contents of the buffer are
+ /// invalidated after subsequent calls to GetBytes()/ReadBytes(). If the buffer
+ /// is entirely from one disk io buffer, a pointer inside that buffer is returned
+ /// directly. If the requested buffer straddles io buffers, a copy is done here.
/// - *out_len is the number of bytes returned.
/// - *status is set if there is an error.
/// Returns true if the call was successful (i.e. status->ok())
@@ -191,8 +192,10 @@ class ScannerContext {
bool SkipBytes(int64_t length, Status* status) WARN_UNUSED_RESULT;
/// Read length bytes into the supplied buffer. The returned buffer is owned
- /// by this object. Returns true on success, otherwise returns false and sets 'status'
- /// to indicate the error.
+ /// by this object The memory is owned by and should not be modified. The contents
+ /// of the buffer are invalidated after subsequent calls to GetBytes()/ReadBytes().
+ /// Returns true on success, otherwise returns false and sets 'status' to
+ /// indicate the error.
bool ReadBytes(int64_t length, uint8_t** buf, Status* status, bool peek = false)
WARN_UNUSED_RESULT;
http://git-wip-us.apache.org/repos/asf/impala/blob/2057931f/be/src/exec/scanner-context.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/scanner-context.inline.h b/be/src/exec/scanner-context.inline.h
index 19ea1f4..b3ff372 100644
--- a/be/src/exec/scanner-context.inline.h
+++ b/be/src/exec/scanner-context.inline.h
@@ -146,6 +146,7 @@ inline bool ScannerContext::Stream::ReadVLong(int64_t* value, Status* status) {
RETURN_IF_FALSE(ReadBytes(1, reinterpret_cast<uint8_t**>(&firstbyte), status));
int len = ReadWriteUtil::DecodeVIntSize(*firstbyte);
+ bool is_negative = ReadWriteUtil::IsNegativeVInt(*firstbyte);
if (len > ReadWriteUtil::MAX_VINT_LEN) {
*status = Status("ReadVLong: size is too big");
return false;
@@ -165,9 +166,8 @@ inline bool ScannerContext::Stream::ReadVLong(int64_t* value, Status* status) {
*value = (*value << 8) | (bytes[i] & 0xFF);
}
- if (ReadWriteUtil::IsNegativeVInt(*firstbyte)) {
- *value = *value ^ (static_cast<int64_t>(-1));
- }
+ if (is_negative) *value = *value ^ (static_cast<int64_t>(-1));
+
return true;
}
http://git-wip-us.apache.org/repos/asf/impala/blob/2057931f/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index e8a24b8..91f0449 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -778,9 +778,6 @@ class TestTpchScanRangeLengths(ImpalaTestSuite):
super(TestTpchScanRangeLengths, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(
ImpalaTestDimension('scan_range_length', *TPCH_SCAN_RANGE_LENGTHS))
- # IMPALA-7360: sequence file scan returns spurious errors
- cls.ImpalaTestMatrix.add_constraint(
- lambda v: v.get_value('table_format').file_format != 'seq')
def test_tpch_scan_ranges(self, vector):
# Randomly adjust the scan range length to exercise different code paths.