You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/12/08 06:24:06 UTC
[3/5] incubator-impala git commit: IMPALA-4539: fix bug when scratch
batch references I/O buffers
IMPALA-4539: fix bug when scratch batch references I/O buffers
The bug occurs when scanning uncompressed plain-encoded Parquet
string data.
Testing:
I could manually reproduce it reliably with ASAN and
--disable_mem_pools=true using the following steps:
# The repro is more predictable when there is no row batch queue.
set mt_dop=1;
# A unique string column should switch to plain encoding
set compression_codec=none;
create table big_uuids stored as parquet as select uuid() from tpch_20_parquet.lineitem;
# The repro requires that some rows are filtered out, so that we end
# up in a state where the output batch is full before all rows are
# copied from the scratch batch
select ndv(_c0) from big_uuids where substring(_c0, 1, 2) != 'a1' limit 10;
After the fix it no longer reproduces.
I do not yet have a practical test case that triggers the bug on a
normal ASAN setup. I will continue to try to create one.
Change-Id: Ic27e7251e0f633cb694b506f6eb62beed6e66ad9
Reviewed-on: http://gerrit.cloudera.org:8080/5406
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/29971ac7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/29971ac7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/29971ac7
Branch: refs/heads/master
Commit: 29971ac7dafadb741cce7a55fe7312fe00ac7dcf
Parents: eaa14f2
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Dec 7 13:53:02 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Dec 8 04:38:31 2016 +0000
----------------------------------------------------------------------
be/src/exec/hdfs-parquet-scanner.cc | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/29971ac7/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc
index f16f9d9..31439f2 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -239,12 +239,11 @@ void HdfsParquetScanner::Close(RowBatch* row_batch) {
if (template_tuple_pool_ != nullptr) template_tuple_pool_->FreeAll();
dictionary_pool_.get()->FreeAll();
context_->ReleaseCompletedResources(nullptr, true);
- for (ParquetColumnReader* col_reader: column_readers_) col_reader->Close(nullptr);
- if (!FLAGS_enable_partitioned_hash_join || !FLAGS_enable_partitioned_aggregation) {
- // With the legacy aggs/joins the tuple ptrs of the scratch batch are allocated
- // from the scratch batch's mem pool. We can get into this case if Open() fails.
- scratch_batch_->mem_pool()->FreeAll();
- }
+ for (ParquetColumnReader* col_reader : column_readers_) col_reader->Close(nullptr);
+ // The scratch batch may still contain tuple data (or tuple ptrs if the legacy joins
+ // or aggs are enabled). We can get into this case if Open() fails or if the query is
+ // cancelled.
+ scratch_batch_->mem_pool()->FreeAll();
}
if (level_cache_pool_ != nullptr) {
level_cache_pool_->FreeAll();
@@ -595,8 +594,11 @@ Status HdfsParquetScanner::CommitRows(RowBatch* dst_batch, int num_rows) {
// We need to pass the row batch to the scan node if there is too much memory attached,
// which can happen if the query is very selective. We need to release memory even
- // if no rows passed predicates.
- if (dst_batch->AtCapacity() || context_->num_completed_io_buffers() > 0) {
+ // if no rows passed predicates. We should only do this when all rows have been copied
+ // from the scratch batch, since those rows may reference completed I/O buffers in
+ // 'context_'.
+ if (scratch_batch_->AtEnd()
+ && (dst_batch->AtCapacity() || context_->num_completed_io_buffers() > 0)) {
context_->ReleaseCompletedResources(dst_batch, /* done */ false);
}
if (context_->cancelled()) return Status::CANCELLED;