You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/04/12 23:19:19 UTC
[45/50] incubator-impala git commit: Remove conjuncts_passed arg in
various Parquet scanner functions.
Remove conjuncts_passed arg in various Parquet scanner functions.
The argument is not used anywhere, so let's remove it.
Change-Id: Iff511d6c4ba309eead6222701c17dee1b909a638
Reviewed-on: http://gerrit.cloudera.org:8080/2732
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/344fbc09
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/344fbc09
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/344fbc09
Branch: refs/heads/master
Commit: 344fbc096644a5d06753aedeafe36eca82910cbc
Parents: ee8c309
Author: Alex Behm <al...@cloudera.com>
Authored: Thu Apr 7 16:24:31 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Tue Apr 12 14:03:44 2016 -0700
----------------------------------------------------------------------
be/src/exec/hdfs-parquet-scanner.cc | 71 ++++++++++++--------------------
1 file changed, 27 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/344fbc09/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc
index a910243..9e18c66 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -253,10 +253,6 @@ class HdfsParquetScanner::ColumnReader {
/// set, the query is cancelled, or the scan node limit was reached. Otherwise returns
/// true.
///
- /// *conjuncts_passed is an in/out parameter. ReadValue() sets to false if the current
- /// row should be filtered based on this column's value. If already false on input, the
- /// row has already been filtered and ReadValue() only needs to advance the value.
- ///
/// NextLevels() must be called on this reader before calling ReadValue() for the first
/// time. This is to initialize the current value that ReadValue() will read.
///
@@ -266,12 +262,11 @@ class HdfsParquetScanner::ColumnReader {
/// TODO: another option is to materialize col by col for the entire row batch in
/// one call. e.g. MaterializeCol would write out 1024 values. Our row batches
/// are currently dense so we'll need to figure out something there.
- virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) = 0;
+ virtual bool ReadValue(MemPool* pool, Tuple* tuple) = 0;
/// Same as ReadValue() but does not advance repetition level. Only valid for columns not
/// in collections.
- virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple,
- bool* conjuncts_passed) = 0;
+ virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple) = 0;
/// Advances this column reader's def and rep levels to the next logical value, i.e. to
/// the next scalar value or the beginning of the next collection, without attempting to
@@ -375,11 +370,11 @@ class HdfsParquetScanner::CollectionColumnReader :
/// Materializes CollectionValue into tuple slot (if materializing) and advances to next
/// value.
- virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed);
+ virtual bool ReadValue(MemPool* pool, Tuple* tuple);
/// Same as ReadValue but does not advance repetition level. Only valid for columns not
/// in collections.
- virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed);
+ virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple);
/// Advances all child readers to the beginning of the next collection and updates this
/// reader's state.
@@ -409,7 +404,7 @@ class HdfsParquetScanner::CollectionColumnReader :
/// Returns false if execution should be aborted for some reason, e.g. parse_error_ is
/// set, the query is cancelled, or the scan node limit was reached. Otherwise returns
/// true.
- inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed);
+ inline bool ReadSlot(void* slot, MemPool* pool);
};
/// Reader for a single column from the parquet file. It's associated with a
@@ -565,7 +560,7 @@ class HdfsParquetScanner::BaseScalarColumnReader :
/// set, the query is cancelled, or the scan node limit was reached. Otherwise returns
/// true.
template <bool IN_COLLECTION>
- inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed);
+ inline bool ReadSlot(void* slot, MemPool* pool);
};
/// Per column type reader. If MATERIALIZED is true, the column values are materialized
@@ -605,17 +600,17 @@ class HdfsParquetScanner::ScalarColumnReader :
virtual ~ScalarColumnReader() { }
- virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
- return ReadValue<true>(pool, tuple, conjuncts_passed);
+ virtual bool ReadValue(MemPool* pool, Tuple* tuple) {
+ return ReadValue<true>(pool, tuple);
}
- virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
- return ReadValue<false>(pool, tuple, conjuncts_passed);
+ virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple) {
+ return ReadValue<false>(pool, tuple);
}
protected:
template <bool IN_COLLECTION>
- inline bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
+ inline bool ReadValue(MemPool* pool, Tuple* tuple) {
// NextLevels() should have already been called and def and rep levels should be in
// valid range.
DCHECK_GE(rep_level_, 0);
@@ -628,8 +623,7 @@ class HdfsParquetScanner::ScalarColumnReader :
if (!MATERIALIZED) {
return NextLevels<IN_COLLECTION>();
} else if (def_level_ >= max_def_level()) {
- return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool,
- conjuncts_passed);
+ return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool);
} else {
// Null value
tuple->SetNull(null_indicator_offset_);
@@ -682,7 +676,7 @@ class HdfsParquetScanner::ScalarColumnReader :
/// set, the query is cancelled, or the scan node limit was reached. Otherwise returns
/// true.
template <bool IN_COLLECTION>
- inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed) {
+ inline bool ReadSlot(void* slot, MemPool* pool) {
T val;
T* val_ptr = NeedsConversion() ? &val : reinterpret_cast<T*>(slot);
if (page_encoding_ == parquet::Encoding::PLAIN_DICTIONARY) {
@@ -794,13 +788,12 @@ class HdfsParquetScanner::BoolColumnReader :
virtual ~BoolColumnReader() { }
- virtual bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
- return ReadValue<true>(pool, tuple, conjuncts_passed);
+ virtual bool ReadValue(MemPool* pool, Tuple* tuple) {
+ return ReadValue<true>(pool, tuple);
}
- virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple,
- bool* conjuncts_passed) {
- return ReadValue<false>(pool, tuple, conjuncts_passed);
+ virtual bool ReadNonRepeatedValue(MemPool* pool, Tuple* tuple) {
+ return ReadValue<false>(pool, tuple);
}
protected:
@@ -825,7 +818,7 @@ class HdfsParquetScanner::BoolColumnReader :
private:
template<bool IN_COLLECTION>
- inline bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
+ inline bool ReadValue(MemPool* pool, Tuple* tuple) {
DCHECK(slot_desc_ != NULL);
// Def and rep levels should be in valid range.
DCHECK_GE(rep_level_, 0);
@@ -836,8 +829,7 @@ class HdfsParquetScanner::BoolColumnReader :
"Caller should have called NextLevels() until we are ready to read a value";
if (def_level_ >= max_def_level()) {
- return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool,
- conjuncts_passed);
+ return ReadSlot<IN_COLLECTION>(tuple->GetSlot(tuple_offset_), pool);
} else {
// Null value
tuple->SetNull(null_indicator_offset_);
@@ -852,7 +844,7 @@ class HdfsParquetScanner::BoolColumnReader :
/// set, the query is cancelled, or the scan node limit was reached. Otherwise returns
/// true.
template <bool IN_COLLECTION>
- inline bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_passed) {
+ inline bool ReadSlot(void* slot, MemPool* pool) {
if (!bool_values_.GetValue(1, reinterpret_cast<bool*>(slot))) {
parent_->parse_status_ = Status("Invalid bool column.");
return false;
@@ -1345,8 +1337,7 @@ bool HdfsParquetScanner::CollectionColumnReader::NextLevels() {
return true;
}
-bool HdfsParquetScanner::CollectionColumnReader::ReadValue(
- MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
+bool HdfsParquetScanner::CollectionColumnReader::ReadValue(MemPool* pool, Tuple* tuple) {
DCHECK_GE(rep_level_, 0);
DCHECK_GE(def_level_, 0);
DCHECK_GE(def_level_, def_level_of_immediate_repeated_ancestor()) <<
@@ -1355,7 +1346,7 @@ bool HdfsParquetScanner::CollectionColumnReader::ReadValue(
if (tuple_offset_ == -1) {
return CollectionColumnReader::NextLevels();
} else if (def_level_ >= max_def_level()) {
- return ReadSlot(tuple->GetSlot(tuple_offset_), pool, conjuncts_passed);
+ return ReadSlot(tuple->GetSlot(tuple_offset_), pool);
} else {
// Null value
tuple->SetNull(null_indicator_offset_);
@@ -1364,19 +1355,14 @@ bool HdfsParquetScanner::CollectionColumnReader::ReadValue(
}
bool HdfsParquetScanner::CollectionColumnReader::ReadNonRepeatedValue(
- MemPool* pool, Tuple* tuple, bool* conjuncts_passed) {
- return CollectionColumnReader::ReadValue(pool, tuple, conjuncts_passed);
+ MemPool* pool, Tuple* tuple) {
+ return CollectionColumnReader::ReadValue(pool, tuple);
}
-// TODO for 2.3: test query where *conjuncts_passed == false
-bool HdfsParquetScanner::CollectionColumnReader::ReadSlot(
- void* slot, MemPool* pool, bool* conjuncts_passed) {
+bool HdfsParquetScanner::CollectionColumnReader::ReadSlot(void* slot, MemPool* pool) {
DCHECK(!children_.empty());
DCHECK_LE(rep_level_, new_collection_rep_level());
- // TODO: do something with conjuncts_passed? We still need to "read" the value in order
- // to advance children_ but we don't need to materialize the collection.
-
// Recursively read the collection into a new CollectionValue.
CollectionValue* coll_slot = reinterpret_cast<CollectionValue*>(slot);
*coll_slot = CollectionValue();
@@ -1716,7 +1702,6 @@ inline bool HdfsParquetScanner::ReadRow(const vector<ColumnReader*>& column_read
Tuple* tuple, MemPool* pool, bool* materialize_tuple) {
DCHECK(!column_readers.empty());
bool continue_execution = true;
- bool conjuncts_passed = true;
int size = column_readers.size();
for (int c = 0; c < size; ++c) {
ColumnReader* col_reader = column_readers[c];
@@ -1724,15 +1709,14 @@ inline bool HdfsParquetScanner::ReadRow(const vector<ColumnReader*>& column_read
DCHECK(*materialize_tuple);
DCHECK(col_reader->pos_slot_desc() == NULL);
// We found a value, read it
- continue_execution = col_reader->ReadNonRepeatedValue(pool, tuple,
- &conjuncts_passed);
+ continue_execution = col_reader->ReadNonRepeatedValue(pool, tuple);
} else if (*materialize_tuple) {
// All column readers for this tuple should a value to materialize.
FILE_CHECK_GE(col_reader->def_level(),
col_reader->def_level_of_immediate_repeated_ancestor());
// Fill in position slot if applicable
if (col_reader->pos_slot_desc() != NULL) col_reader->ReadPosition(tuple);
- continue_execution = col_reader->ReadValue(pool, tuple, &conjuncts_passed);
+ continue_execution = col_reader->ReadValue(pool, tuple);
} else {
// A containing repeated field is empty or NULL
FILE_CHECK_LT(col_reader->def_level(),
@@ -1741,7 +1725,6 @@ inline bool HdfsParquetScanner::ReadRow(const vector<ColumnReader*>& column_read
}
if (UNLIKELY(!continue_execution)) break;
}
- *materialize_tuple &= conjuncts_passed;
if (!IN_COLLECTION && *materialize_tuple) {
TupleRow* tuple_row_mem = reinterpret_cast<TupleRow*>(&tuple);