You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ji...@apache.org on 2022/12/04 01:14:21 UTC
[doris] branch branch-1.1-lts updated: [branch-1.1](log) add tracing log for publish task (#14786)
This is an automated email from the ASF dual-hosted git repository.
jiafengzheng pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new 6c9caec6fd [branch-1.1](log) add tracing log for publish task (#14786)
6c9caec6fd is described below
commit 6c9caec6fd325652a647c070d8ae78e5262744f0
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Sun Dec 4 09:14:16 2022 +0800
[branch-1.1](log) add tracing log for publish task (#14786)
---
be/src/agent/task_worker_pool.cpp | 2 --
be/src/common/config.h | 3 ++-
be/src/olap/data_dir.h | 5 +++++
be/src/olap/task/engine_publish_version_task.cpp | 23 +++++++++++++++++++----
be/src/olap/txn_manager.cpp | 12 +++++++++---
be/src/olap/txn_manager.h | 4 ++--
be/test/olap/delta_writer_test.cpp | 6 ++++--
be/test/olap/txn_manager_test.cpp | 6 ++++--
8 files changed, 45 insertions(+), 16 deletions(-)
diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index cb78c3beb6..41838f788d 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -729,14 +729,12 @@ void TaskWorkerPool::_publish_version_worker_thread_callback() {
st = Status::RuntimeError(strings::Substitute("publish version failed. error=$0", res));
finish_task_request.__set_error_tablet_ids(error_tablet_ids);
} else {
- int submit_tablets = 0;
if (config::enable_quick_compaction && config::quick_compaction_batch_size > 0) {
for (auto& entry : succ_tablet_ids) {
TabletSharedPtr tablet =
StorageEngine::instance()->tablet_manager()->get_tablet(
entry.first, entry.second);
if (tablet != nullptr) {
- submit_tablets++;
tablet->publised_count++;
if (tablet->publised_count % config::quick_compaction_batch_size == 0) {
StorageEngine::instance()->submit_quick_compaction_task(tablet);
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 68c5e6cd7e..d3b45ba109 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -308,6 +308,7 @@ CONF_mInt64(row_step_for_compaction_merge_log, "0");
// Threshold to logging compaction trace, in seconds.
CONF_mInt32(base_compaction_trace_threshold, "60");
CONF_mInt32(cumulative_compaction_trace_threshold, "10");
+CONF_mInt32(publish_task_trace_threshold, "2");
CONF_mBool(disable_compaction_trace_log, "true");
// Threshold to logging agent task trace, in seconds.
@@ -553,7 +554,7 @@ CONF_mInt64(max_runnings_transactions_per_txn_map, "100");
// tablet_map_lock shard size, the value is 2^n, n=0,1,2,3,4
// this is a an enhancement for better performance to manage tablet
-CONF_Int32(tablet_map_shard_size, "1");
+CONF_Int32(tablet_map_shard_size, "4");
// txn_map_lock shard size, the value is 2^n, n=0,1,2,3,4
// this is a an enhancement for better performance to manage txn
diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h
index 68933167c2..3b4a41799f 100644
--- a/be/src/olap/data_dir.h
+++ b/be/src/olap/data_dir.h
@@ -40,6 +40,11 @@ class TabletManager;
class TabletMeta;
class TxnManager;
+struct PublishStatistic {
+ size_t get_lock_time = 0;
+ size_t save_meta_time = 0;
+};
+
// A DataDir used to manage data in same path.
// Now, After DataDir was created, it will never be deleted for easy implementation.
class DataDir {
diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp
index 1abace542a..8d9aff21dc 100644
--- a/be/src/olap/task/engine_publish_version_task.cpp
+++ b/be/src/olap/task/engine_publish_version_task.cpp
@@ -22,6 +22,7 @@
#include "olap/data_dir.h"
#include "olap/rowset/rowset_meta_manager.h"
#include "olap/tablet_manager.h"
+#include "util/scoped_cleanup.h"
namespace doris {
@@ -37,9 +38,20 @@ EnginePublishVersionTask::EnginePublishVersionTask(TPublishVersionRequest& publi
OLAPStatus EnginePublishVersionTask::finish() {
OLAPStatus res = OLAP_SUCCESS;
int64_t transaction_id = _publish_version_req.transaction_id;
- OlapStopWatch watch;
+ // OlapStopWatch watch;
VLOG_NOTICE << "begin to process publish version. transaction_id=" << transaction_id;
+ PublishStatistic stat;
+ MonotonicStopWatch watch;
+ watch.start();
+ SCOPED_CLEANUP({
+ if (watch.elapsed_time() / 1e9 > config::publish_task_trace_threshold) {
+ LOG(WARNING) << "Trace " << transaction_id << " publish stat(ns): get lock time: "
+ << stat.get_lock_time << ", save meta time: " << stat.save_meta_time;
+ }
+ });
+
+ size_t tablet_num = 0;
// each partition
for (auto& par_ver_info : _publish_version_req.partition_version_infos) {
int64_t partition_id = par_ver_info.partition_id;
@@ -59,6 +71,7 @@ OLAPStatus EnginePublishVersionTask::finish() {
Version version(par_ver_info.version, par_ver_info.version);
+ tablet_num += tablet_related_rs.size();
// each tablet
for (auto& tablet_rs : tablet_related_rs) {
OLAPStatus publish_status = OLAP_SUCCESS;
@@ -90,7 +103,7 @@ OLAPStatus EnginePublishVersionTask::finish() {
}
publish_status = StorageEngine::instance()->txn_manager()->publish_txn(
- partition_id, tablet, transaction_id, version);
+ partition_id, tablet, transaction_id, version, &stat);
if (publish_status != OLAP_SUCCESS) {
LOG(WARNING) << "failed to publish version. rowset_id=" << rowset->rowset_id()
<< ", tablet_id=" << tablet_info.tablet_id
@@ -155,8 +168,10 @@ OLAPStatus EnginePublishVersionTask::finish() {
}
LOG(INFO) << "finish to publish version on transaction."
- << "transaction_id=" << transaction_id << ", cost(us): " << watch.get_elapse_time_us()
- << ", error_tablet_size=" << _error_tablet_ids->size();
+ << "transaction_id=" << transaction_id << ", cost(us): " << watch.elapsed_time() / 1000
+ << ", error_tablet_size=" << _error_tablet_ids->size()
+ << ", partition num: " << _publish_version_req.partition_version_infos.size()
+ << ", tablet num: " << tablet_num;
return res;
}
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index 799aaffbda..31079c292b 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -97,9 +97,10 @@ OLAPStatus TxnManager::commit_txn(TPartitionId partition_id, const TabletSharedP
}
OLAPStatus TxnManager::publish_txn(TPartitionId partition_id, const TabletSharedPtr& tablet,
- TTransactionId transaction_id, const Version& version) {
+ TTransactionId transaction_id, const Version& version,
+ PublishStatistic* stat) {
return publish_txn(tablet->data_dir()->get_meta(), partition_id, transaction_id,
- tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid(), version);
+ tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid(), version, stat);
}
// delete the txn from manager if it is not committed(not have a valid rowset)
@@ -257,13 +258,16 @@ OLAPStatus TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id,
OLAPStatus TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
TTransactionId transaction_id, TTabletId tablet_id,
SchemaHash schema_hash, TabletUid tablet_uid,
- const Version& version) {
+ const Version& version, PublishStatistic* stat) {
+ MonotonicStopWatch watch;
+ watch.start();
pair<int64_t, int64_t> key(partition_id, transaction_id);
TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid);
RowsetSharedPtr rowset_ptr = nullptr;
MutexLock txn_lock(&_get_txn_lock(transaction_id));
{
ReadLock rlock(_get_txn_map_lock(transaction_id));
+ stat->get_lock_time += watch.elapsed_time();
txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
auto it = txn_tablet_map.find(key);
if (it != txn_tablet_map.end()) {
@@ -281,6 +285,7 @@ OLAPStatus TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
if (rowset_ptr != nullptr) {
// TODO(ygl): rowset is already set version here, memory is changed, if save failed
// it maybe a fatal error
+ watch.reset();
rowset_ptr->make_visible(version);
OLAPStatus save_status =
RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(),
@@ -291,6 +296,7 @@ OLAPStatus TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id,
<< ", txn id:" << transaction_id;
return OLAP_ERR_ROWSET_SAVE_FAILED;
}
+ stat->save_meta_time += watch.elapsed_time();
} else {
return OLAP_ERR_TRANSACTION_NOT_EXIST;
}
diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h
index 24ab6fb7aa..ef4665ea86 100644
--- a/be/src/olap/txn_manager.h
+++ b/be/src/olap/txn_manager.h
@@ -79,7 +79,7 @@ public:
const RowsetSharedPtr& rowset_ptr, bool is_recovery);
OLAPStatus publish_txn(TPartitionId partition_id, const TabletSharedPtr& tablet,
- TTransactionId transaction_id, const Version& version);
+ TTransactionId transaction_id, const Version& version, PublishStatistic* stat = nullptr);
// delete the txn from manager if it is not committed(not have a valid rowset)
OLAPStatus rollback_txn(TPartitionId partition_id, const TabletSharedPtr& tablet,
@@ -103,7 +103,7 @@ public:
// not persist rowset meta because
OLAPStatus publish_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id,
TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid,
- const Version& version);
+ const Version& version, PublishStatistic* stat = nullptr);
// delete the txn from manager if it is not committed(not have a valid rowset)
OLAPStatus rollback_txn(TPartitionId partition_id, TTransactionId transaction_id,
diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp
index f4fd29d44b..34a38655ed 100644
--- a/be/test/olap/delta_writer_test.cpp
+++ b/be/test/olap/delta_writer_test.cpp
@@ -488,11 +488,12 @@ TEST_F(TestDeltaWriter, write) {
std::map<TabletInfo, RowsetSharedPtr> tablet_related_rs;
StorageEngine::instance()->txn_manager()->get_txn_related_tablets(
write_req.txn_id, write_req.partition_id, &tablet_related_rs);
+ PublishStatistic stat;
for (auto& tablet_rs : tablet_related_rs) {
RowsetSharedPtr rowset = tablet_rs.second;
res = k_engine->txn_manager()->publish_txn(meta, write_req.partition_id, write_req.txn_id,
write_req.tablet_id, write_req.schema_hash,
- tablet_rs.first.tablet_uid, version);
+ tablet_rs.first.tablet_uid, version, &stat);
EXPECT_EQ(OLAP_SUCCESS, res);
res = tablet->add_inc_rowset(rowset);
EXPECT_EQ(OLAP_SUCCESS, res);
@@ -559,11 +560,12 @@ TEST_F(TestDeltaWriter, sequence_col) {
std::map<TabletInfo, RowsetSharedPtr> tablet_related_rs;
StorageEngine::instance()->txn_manager()->get_txn_related_tablets(
write_req.txn_id, write_req.partition_id, &tablet_related_rs);
+ PublishStatistic stat;
for (auto& tablet_rs : tablet_related_rs) {
RowsetSharedPtr rowset = tablet_rs.second;
res = k_engine->txn_manager()->publish_txn(meta, write_req.partition_id, write_req.txn_id,
write_req.tablet_id, write_req.schema_hash,
- tablet_rs.first.tablet_uid, version);
+ tablet_rs.first.tablet_uid, version, &stat);
EXPECT_EQ(OLAP_SUCCESS, res);
res = tablet->add_inc_rowset(rowset);
EXPECT_EQ(OLAP_SUCCESS, res);
diff --git a/be/test/olap/txn_manager_test.cpp b/be/test/olap/txn_manager_test.cpp
index a3213da90d..a9220b27d0 100644
--- a/be/test/olap/txn_manager_test.cpp
+++ b/be/test/olap/txn_manager_test.cpp
@@ -283,8 +283,9 @@ TEST_F(TxnManagerTest, PublishVersionSuccessful) {
_tablet_uid, load_id, _alpha_rowset, false);
EXPECT_TRUE(status == OLAP_SUCCESS);
Version new_version(10, 11);
+ PublishStatistic stat;
status = _txn_mgr->publish_txn(_meta, partition_id, transaction_id, tablet_id, schema_hash,
- _tablet_uid, new_version);
+ _tablet_uid, new_version, &stat);
EXPECT_TRUE(status == OLAP_SUCCESS);
RowsetMetaSharedPtr rowset_meta(new AlphaRowsetMeta());
@@ -299,8 +300,9 @@ TEST_F(TxnManagerTest, PublishVersionSuccessful) {
// 1. publish version failed if not found related txn and rowset
TEST_F(TxnManagerTest, PublishNotExistedTxn) {
Version new_version(10, 11);
+ PublishStatistic stat;
OLAPStatus status = _txn_mgr->publish_txn(_meta, partition_id, transaction_id, tablet_id,
- schema_hash, _tablet_uid, new_version);
+ schema_hash, _tablet_uid, new_version, &stat);
EXPECT_TRUE(status != OLAP_SUCCESS);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org