You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/03/18 23:31:54 UTC
[impala] 01/02: IMPALA-11185: Reuse orc row batch in the scanner life-cycle
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4d32ab7122557ca3336354301a3a467a206913a9
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Wed Mar 16 13:12:13 2022 +0800
IMPALA-11185: Reuse orc row batch in the scanner life-cycle
In HdfsOrcScanner::AssembleRows(), we always re-create a
orc::ColumnVectorBatch. The ideal pattern is reusing the batch and only
destroying it when the scanner is closed.
This save half of the scanner time in some TPCH queries. See the flame
graph in JIRA description.
Tests:
- Run CORE test
Change-Id: I03887ed94af2ff03d67cd00c79375c734a75af62
Reviewed-on: http://gerrit.cloudera.org:8080/18325
Reviewed-by: Quanlong Huang <hu...@gmail.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/hdfs-orc-scanner.cc | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/be/src/exec/hdfs-orc-scanner.cc b/be/src/exec/hdfs-orc-scanner.cc
index 5809ac7..bc81fc4 100644
--- a/be/src/exec/hdfs-orc-scanner.cc
+++ b/be/src/exec/hdfs-orc-scanner.cc
@@ -437,6 +437,8 @@ Status HdfsOrcScanner::Open(ScannerContext* context) {
}
orc_root_reader_ = this->obj_pool_.Add(
new OrcStructReader(root_type, scan_node_->tuple_desc(), this));
+ orc_root_batch_ = tmp_row_reader->createRowBatch(state_->batch_size());
+ DCHECK_EQ(orc_root_batch_->numElements, 0);
} RETURN_ON_ORC_EXCEPTION(
"Encountered parse error during schema selection in ORC file $0: $1");
@@ -934,11 +936,6 @@ Status HdfsOrcScanner::AssembleRows(RowBatch* row_batch) {
// We're going to free the previous batch. Clear the reference first.
RETURN_IF_ERROR(orc_root_reader_->UpdateInputBatch(nullptr));
- try {
- orc_root_batch_ = row_reader_->createRowBatch(row_batch->capacity());
- DCHECK_EQ(orc_root_batch_->numElements, 0);
- } RETURN_ON_ORC_EXCEPTION("Encounter error in creating ORC row batch for file $0: $1.");
-
int64_t num_rows_read = 0;
while (continue_execution) { // one ORC batch (ColumnVectorBatch) in a round
if (orc_root_reader_->EndOfBatch()) {