You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2020/11/03 11:54:05 UTC
[incubator-doris] branch master updated: [Optimize] Add an
unordered_map for TabletSchema to speed up column name lookup (#4779)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new d1c2b3e [Optimize] Add an unordered_map for TabletSchema to speed up column name lookup (#4779)
d1c2b3e is described below
commit d1c2b3ed0d2c520ba670a24938d97e164689ccaf
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Tue Nov 3 19:53:44 2020 +0800
[Optimize] Add an unordered_map for TabletSchema to speed up column name lookup (#4779)
Reduce column name lookup for TabletSchema and Tablet from O(N) to O(1).
---
be/src/olap/delete_handler.cpp | 17 +++-------------
be/src/olap/delete_handler.h | 18 ++---------------
be/src/olap/olap_cond.cpp | 6 +++---
be/src/olap/olap_cond.h | 13 ++----------
be/src/olap/reader.cpp | 7 +++++--
be/src/olap/tablet.h | 4 ++--
be/src/olap/tablet_manager.cpp | 19 ++++++-----------
be/src/olap/tablet_schema.cpp | 27 +++++++++++++------------
be/src/olap/tablet_schema.h | 9 ++++++++-
be/test/olap/rowset/segment_v2/segment_test.cpp | 16 ++++++++-------
10 files changed, 54 insertions(+), 82 deletions(-)
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 28c8d77..8e8e80a 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -154,8 +154,7 @@ OLAPStatus DeleteConditionHandler::check_condition_valid(
const TabletSchema& schema,
const TCondition& cond) {
// 检查指定列名的列是否存在
- int field_index = _get_field_index(schema, cond.column_name);
-
+ int32_t field_index = schema.field_index(cond.column_name);
if (field_index < 0) {
OLAP_LOG_WARNING("field is not existent. [field_index=%d]", field_index);
return OLAP_ERR_DELETE_INVALID_CONDITION;
@@ -229,19 +228,10 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio
OLAPStatus DeleteHandler::init(const TabletSchema& schema,
const DelPredicateArray& delete_conditions, int32_t version) {
- if (_is_inited) {
- OLAP_LOG_WARNING("reinitialize delete handler.");
- return OLAP_ERR_INIT_FAILED;
-
- }
-
- if (version < 0) {
- OLAP_LOG_WARNING("invalid parameters. [version=%d]", version);
- return OLAP_ERR_DELETE_INVALID_PARAMETERS;
- }
+ DCHECK(!_is_inited) << "reinitialize delete handler.";
+ DCHECK(version >= 0) << "invalid parameters. version=" << version;
DelPredicateArray::const_iterator it = delete_conditions.begin();
-
for (; it != delete_conditions.end(); ++it) {
// 跳过版本号大于version的过滤条件
if (it->version() > version) {
@@ -250,7 +240,6 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
DeleteConditions temp;
temp.filter_version = it->version();
-
temp.del_cond = new(std::nothrow) Conditions();
if (temp.del_cond == nullptr) {
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index 7cab577..b03227a 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -51,27 +51,13 @@ public:
std::string construct_sub_predicates(const TCondition& condition);
private:
-
- int32_t _get_field_index(const TabletSchema& schema, const std::string& field_name) const {
- for (int i = 0; i < schema.num_columns(); i++) {
- if (schema.column(i).name() == field_name) {
- return i;
- }
- }
- LOG(WARNING) << "invalid field name. name='" << field_name;
- return -1;
- }
-
bool is_condition_value_valid(const TabletColumn& column, const TCondition& cond, const string& value);
};
// 表示一个删除条件
struct DeleteConditions {
- DeleteConditions() : filter_version(0), del_cond(NULL) {}
- ~DeleteConditions() {}
-
- int32_t filter_version; // 删除条件版本号
- Conditions* del_cond; // 删除条件
+ int32_t filter_version = 0; // 删除条件版本号
+ Conditions* del_cond = nullptr; // 删除条件
};
// 这个类主要用于判定一条数据(RowCursor)是否符合删除条件。这个类的使用流程如下:
diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp
index 5e37e76..74c603f 100644
--- a/be/src/olap/olap_cond.cpp
+++ b/be/src/olap/olap_cond.cpp
@@ -579,10 +579,10 @@ bool CondColumn::eval(const segment_v2::BloomFilter* bf) const {
}
OLAPStatus Conditions::append_condition(const TCondition& tcond) {
- int32_t index = _get_field_index(tcond.column_name);
+ DCHECK(_schema != nullptr);
+ int32_t index = _schema->field_index(tcond.column_name);
if (index < 0) {
- LOG(WARNING) << "fail to get field index, name is invalid. index=" << index
- << ", field_name=" << tcond.column_name;
+ LOG(WARNING) << "fail to get field index, field name=" << tcond.column_name;
return OLAP_ERR_INPUT_PARAMETER_ERROR;
}
diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h
index c456be3..bfc53d1 100644
--- a/be/src/olap/olap_cond.h
+++ b/be/src/olap/olap_cond.h
@@ -165,6 +165,7 @@ public:
_columns.clear();
}
+ // TODO(yingchun): should do it in constructor
void set_tablet_schema(const TabletSchema* schema) {
_schema = schema;
}
@@ -187,22 +188,12 @@ public:
CondColumn* get_column(int32_t cid) const;
private:
- int32_t _get_field_index(const std::string& field_name) const {
- for (int i = 0; i < _schema->num_columns(); i++) {
- if (_schema->column(i).name() == field_name) {
- return i;
- }
- }
- LOG(WARNING) << "invalid field name. [name='" << field_name << "']";
- return -1;
- }
-
bool _cond_column_is_key_or_duplicate(const CondColumn* cc) const {
return cc->is_key() || _schema->keys_type() == KeysType::DUP_KEYS;
}
private:
- const TabletSchema* _schema;
+ const TabletSchema* _schema = nullptr;
CondColumns _columns; // list of condition column
};
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index 1d616c1..d6a4ba9 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -748,7 +748,7 @@ OLAPStatus Reader::_init_keys_param(const ReaderParams& read_params) {
void Reader::_init_conditions_param(const ReaderParams& read_params) {
_conditions.set_tablet_schema(&_tablet->tablet_schema());
for (const auto& condition : read_params.conditions) {
- _conditions.append_condition(condition);
+ DCHECK_EQ(OLAP_SUCCESS, _conditions.append_condition(condition));
ColumnPredicate* predicate = _parse_to_predicate(condition);
if (predicate != nullptr) {
_col_predicates.push_back(predicate);
@@ -854,7 +854,10 @@ COMPARISON_PREDICATE_CONDITION_VALUE(ge, GreaterEqualPredicate)
ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition) {
// TODO: not equal and not in predicate is not pushed down
- int index = _tablet->field_index(condition.column_name);
+ int32_t index = _tablet->field_index(condition.column_name);
+ if (index < 0) {
+ return nullptr;
+ }
const TabletColumn& column = _tablet->tablet_schema().column(index);
if (column.aggregation() != FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {
return nullptr;
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 0786382..266b83c 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -89,7 +89,7 @@ public:
inline double bloom_filter_fpp() const;
inline size_t next_unique_id() const;
inline size_t row_size() const;
- inline size_t field_index(const string& field_name) const;
+ inline int32_t field_index(const string& field_name) const;
// operation in rowsets
OLAPStatus add_rowset(RowsetSharedPtr rowset, bool need_persist = true);
@@ -396,7 +396,7 @@ inline size_t Tablet::next_unique_id() const {
return _schema.next_column_unique_id();
}
-inline size_t Tablet::field_index(const string& field_name) const {
+inline int32_t Tablet::field_index(const string& field_name) const {
return _schema.field_index(field_name);
}
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 29f53be..4fa653e 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -1333,7 +1333,6 @@ OLAPStatus TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& r
next_unique_id = request.tablet_schema.columns.size();
} else {
next_unique_id = base_tablet->next_unique_id();
- size_t old_num_columns = base_tablet->num_columns();
auto& new_columns = request.tablet_schema.columns;
for (uint32_t new_col_idx = 0; new_col_idx < new_columns.size(); ++new_col_idx) {
const TColumn& column = new_columns[new_col_idx];
@@ -1342,18 +1341,12 @@ OLAPStatus TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& r
// unique_id in old_tablet to be the column's ordinal number in new_tablet
// 2. if column exists only in new_tablet, assign next_unique_id of old_tablet
// to the new column
- size_t old_col_idx = 0;
- for (old_col_idx = 0 ; old_col_idx < old_num_columns; ++old_col_idx) {
- const string& old_name = base_tablet->tablet_schema().column(old_col_idx).name();
- if (old_name == column.column_name) {
- uint32_t old_unique_id
- = base_tablet->tablet_schema().column(old_col_idx).unique_id();
- col_idx_to_unique_id[new_col_idx] = old_unique_id;
- break;
- }
- }
- // Not exist in old tablet, it is a new added column
- if (old_col_idx == old_num_columns) {
+ int32_t old_col_idx = base_tablet->field_index(column.column_name);
+ if (old_col_idx != -1) {
+ uint32_t old_unique_id = base_tablet->tablet_schema().column(old_col_idx).unique_id();
+ col_idx_to_unique_id[new_col_idx] = old_unique_id;
+ } else {
+ // Not exist in old tablet, it is a new added column
col_idx_to_unique_id[new_col_idx] = next_unique_id++;
}
}
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 02e9c5e..29ef956 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -348,17 +348,19 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema) {
_num_key_columns = 0;
_num_null_columns = 0;
_cols.clear();
+ _field_name_to_index.clear();
for (auto& column_pb : schema.column()) {
TabletColumn column;
column.init_from_pb(column_pb);
- _cols.push_back(column);
- _num_columns++;
if (column.is_key()) {
_num_key_columns++;
}
if (column.is_nullable()) {
_num_null_columns++;
}
+ _field_name_to_index[column.name()] = _num_columns;
+ _cols.emplace_back(std::move(column));
+ _num_columns++;
}
_num_short_key_columns = schema.num_short_key_columns();
_num_rows_per_row_block = schema.num_rows_per_row_block();
@@ -404,17 +406,9 @@ size_t TabletSchema::row_size() const {
return size;
}
-size_t TabletSchema::field_index(const std::string& field_name) const {
- bool field_exist = false;
- int ordinal = -1;
- for (auto& column : _cols) {
- ordinal++;
- if (column.name() == field_name) {
- field_exist = true;
- break;
- }
- }
- return field_exist ? ordinal : -1;
+int32_t TabletSchema::field_index(const std::string& field_name) const {
+ const auto& found = _field_name_to_index.find(field_name);
+ return (found == _field_name_to_index.end()) ? -1 : found->second;
}
const std::vector<TabletColumn>& TabletSchema::columns() const {
@@ -427,6 +421,13 @@ const TabletColumn& TabletSchema::column(size_t ordinal) const {
return _cols[ordinal];
}
+void TabletSchema::init_field_index_for_test() {
+ _field_name_to_index.clear();
+ for (int i = 0; i < _cols.size(); ++i) {
+ _field_name_to_index[_cols[i].name()] = i;
+ }
+}
+
bool operator==(const TabletColumn& a, const TabletColumn& b) {
if (a._unique_id != b._unique_id) return false;
if (a._col_name != b._col_name) return false;
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index da7aedf..480f0a5 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -95,11 +95,14 @@ bool operator!=(const TabletColumn& a, const TabletColumn& b);
class TabletSchema {
public:
+ // TODO(yingchun): better to make constructor as private to avoid
+ // manually init members incorrectly, and define a new function like
+ // void create_from_pb(const TabletSchemaPB& schema, TabletSchema* tablet_schema)
TabletSchema() = default;
void init_from_pb(const TabletSchemaPB& schema);
void to_schema_pb(TabletSchemaPB* tablet_meta_pb);
size_t row_size() const;
- size_t field_index(const std::string& field_name) const;
+ int32_t field_index(const std::string& field_name) const;
const TabletColumn& column(size_t ordinal) const;
const std::vector<TabletColumn>& columns() const;
inline size_t num_columns() const { return _num_columns; }
@@ -119,12 +122,16 @@ public:
inline int32_t sequence_col_idx() const { return _sequence_col_idx; }
private:
+ // Only for unit test
+ void init_field_index_for_test();
+
friend bool operator==(const TabletSchema& a, const TabletSchema& b);
friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
private:
KeysType _keys_type = DUP_KEYS;
std::vector<TabletColumn> _cols;
+ std::unordered_map<std::string, int32_t> _field_name_to_index;
size_t _num_columns = 0;
size_t _num_key_columns = 0;
size_t _num_null_columns = 0;
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index b25a7a0..43bd8fd 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -96,6 +96,7 @@ protected:
res._num_columns = columns.size();
res._num_key_columns = num_key_columns;
res._num_short_key_columns = num_short_key_columns != -1 ? num_short_key_columns : num_key_columns;
+ res.init_field_index_for_test();
return res;
}
@@ -442,7 +443,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) {
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(&tablet_schema);
- conditions->append_condition(condition);
+ ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition));
StorageReadOptions read_opts;
read_opts.stats = &stats;
@@ -465,7 +466,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) {
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(&tablet_schema);
- conditions->append_condition(condition);
+ ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition));
StorageReadOptions read_opts;
read_opts.stats = &stats;
@@ -513,7 +514,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) {
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(&tablet_schema);
- conditions->append_condition(condition);
+ ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition));
// the second page read will be pruned by the following delete predicate
TCondition delete_condition;
@@ -523,7 +524,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) {
delete_condition.__set_condition_values(vals2);
std::shared_ptr<Conditions> delete_conditions(new Conditions());
delete_conditions->set_tablet_schema(&tablet_schema);
- delete_conditions->append_condition(delete_condition);
+ ASSERT_EQ(OLAP_SUCCESS, delete_conditions->append_condition(delete_condition));
StorageReadOptions read_opts;
read_opts.stats = &stats;
@@ -574,7 +575,7 @@ TEST_F(SegmentReaderWriterTest, TestIndex) {
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(&tablet_schema);
- conditions->append_condition(condition);
+ ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition));
read_opts.conditions = conditions.get();
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
@@ -764,6 +765,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
tablet_schema->_cols.push_back(create_char_key(2));
tablet_schema->_cols.push_back(create_varchar_key(3));
tablet_schema->_cols.push_back(create_varchar_key(4));
+ tablet_schema->init_field_index_for_test();
// segment write
std::string dname = "./ut_dir/segment_test";
@@ -912,7 +914,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(tablet_schema.get());
- conditions->append_condition(condition);
+ ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition));
StorageReadOptions read_opts;
read_opts.stats = &stats;
@@ -964,7 +966,7 @@ TEST_F(SegmentReaderWriterTest, TestStringDict) {
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(tablet_schema.get());
- conditions->append_condition(condition);
+ ASSERT_EQ(OLAP_SUCCESS, conditions->append_condition(condition));
StorageReadOptions read_opts;
read_opts.stats = &stats;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org