You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/06/19 07:29:50 UTC
[doris] branch master updated: [enhancement](pk) add bvar latency recorder for pk (#20942)
This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 87e3a79387 [enhancement](pk) add bvar latency recorder for pk (#20942)
87e3a79387 is described below
commit 87e3a79387ff3d7d67a758001dde4223131b72cf
Author: Yongqiang YANG <98...@users.noreply.github.com>
AuthorDate: Mon Jun 19 15:29:42 2023 +0800
[enhancement](pk) add bvar latency recorder for pk (#20942)
---
be/src/agent/task_worker_pool.cpp | 7 ++++-
be/src/olap/lru_cache.cpp | 7 +++++
be/src/olap/lru_cache.h | 5 ++++
.../rowset/segment_v2/indexed_column_reader.cpp | 21 ++++++++++++++-
be/src/olap/rowset/segment_v2/segment.cpp | 4 +++
be/src/olap/tablet.cpp | 14 ++++++++++
be/src/olap/task/engine_publish_version_task.cpp | 26 ++++++++++++++++++
be/src/olap/task/engine_publish_version_task.h | 2 ++
be/src/util/bvar_helper.h | 31 ++++++++++++++++++++++
9 files changed, 115 insertions(+), 2 deletions(-)
diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index 100ac2fd4a..06e4694ac7 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -96,6 +96,8 @@ std::atomic_ulong TaskWorkerPool::_s_report_version(time(nullptr) * 10000);
std::mutex TaskWorkerPool::_s_task_signatures_lock;
std::map<TTaskType::type, std::set<int64_t>> TaskWorkerPool::_s_task_signatures;
+static bvar::LatencyRecorder g_publish_version_latency("doris_pk", "publish_version");
+
TaskWorkerPool::TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* env,
const TMasterInfo& master_info, ThreadModel thread_model)
: _master_info(master_info),
@@ -1440,6 +1442,7 @@ void PublishVersionTaskPool::_publish_version_worker_thread_callback() {
agent_task_req = _tasks.front();
_tasks.pop_front();
}
+
const TPublishVersionRequest& publish_version_req = agent_task_req.publish_version_req;
DorisMetrics::instance()->publish_task_request_total->increment(1);
VLOG_NOTICE << "get publish version task. signature=" << agent_task_req.signature;
@@ -1518,11 +1521,13 @@ void PublishVersionTaskPool::_publish_version_worker_thread_callback() {
}
}
}
+ uint32_t cost_second = time(nullptr) - agent_task_req.recv_time;
+ g_publish_version_latency << cost_second;
LOG_INFO("successfully publish version")
.tag("signature", agent_task_req.signature)
.tag("transaction_id", publish_version_req.transaction_id)
.tag("tablets_num", succ_tablet_ids.size())
- .tag("cost(s)", time(nullptr) - agent_task_req.recv_time);
+ .tag("cost(s)", cost_second);
}
status.to_thrift(&finish_task_request.task_status);
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 000315f773..a79e6fbf96 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -548,6 +548,13 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity,
INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count);
INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count);
INT_DOUBLE_METRIC_REGISTER(_entity, cache_hit_ratio);
+
+ _hit_count_bvar.reset(new bvar::Adder<uint64_t>("doris_cache", _name));
+ _hit_count_per_second.reset(new bvar::PerSecond<bvar::Adder<uint64_t>>(
+ "doris_cache", _name + "_persecond", _hit_count_bvar.get(), 60));
+ _lookup_count_bvar.reset(new bvar::Adder<uint64_t>("doris_cache", _name));
+ _lookup_count_per_second.reset(new bvar::PerSecond<bvar::Adder<uint64_t>>(
+ "doris_cache", _name + "_persecond", _lookup_count_bvar.get(), 60));
}
ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type,
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index f169fea434..4a9d45b07f 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -443,6 +443,11 @@ private:
IntAtomicCounter* cache_lookup_count = nullptr;
IntAtomicCounter* cache_hit_count = nullptr;
DoubleGauge* cache_hit_ratio = nullptr;
+ // bvars
+ std::unique_ptr<bvar::Adder<uint64_t>> _hit_count_bvar;
+ std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _hit_count_per_second;
+ std::unique_ptr<bvar::Adder<uint64_t>> _lookup_count_bvar;
+ std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _lookup_count_per_second;
};
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index fb36f85ccc..eaba30eba2 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -30,11 +30,25 @@
#include "olap/rowset/segment_v2/page_io.h"
#include "olap/types.h"
#include "util/block_compression.h"
+#include "util/bvar_helper.h"
namespace doris {
using namespace ErrorCode;
namespace segment_v2 {
+static bvar::Adder<uint64_t> g_index_reader_bytes("doris_pk", "index_reader_bytes");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_bytes_per_second(
+ "doris_pk", "index_reader_bytes_per_second", &g_index_reader_bytes, 60);
+static bvar::Adder<uint64_t> g_index_reader_pages("doris_pk", "index_reader_pages");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_pages_per_second(
+ "doris_pk", "index_reader_pages_per_second", &g_index_reader_pages, 60);
+static bvar::Adder<uint64_t> g_index_reader_seek_count("doris_pk", "index_reader_seek_count");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_seek_per_second(
+ "doris_pk", "index_reader_seek_per_second", &g_index_reader_seek_count, 60);
+static bvar::Adder<uint64_t> g_index_reader_pk_pages("doris_pk", "index_reader_pk_pages");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_pk_bytes_per_second(
+ "doris_pk", "index_reader_pk_pages_per_second", &g_index_reader_pk_pages, 60);
+
using strings::Substitute;
Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) {
@@ -104,7 +118,10 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle,
opts.encoding_info = _encoding_info;
opts.pre_decode = pre_decode;
- return PageIO::read_and_decompress_page(opts, handle, body, footer);
+ auto st = PageIO::read_and_decompress_page(opts, handle, body, footer);
+ g_index_reader_bytes << footer->uncompressed_size();
+ g_index_reader_pages << 1;
+ return st;
}
///////////////////////////////////////////////////////////////////////////////
@@ -179,6 +196,8 @@ Status IndexedColumnIterator::seek_at_or_after(const void* key, bool* exact_matc
return Status::NotFound("value index is empty ");
}
+ g_index_reader_seek_count << 1;
+
bool load_data_page = false;
PagePointer data_page_pp;
if (_reader->_has_index_page) {
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index e046785bb0..50dc44ee73 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -52,6 +52,7 @@
#include "runtime/query_context.h"
#include "runtime/runtime_predicate.h"
#include "runtime/runtime_state.h"
+#include "util/bvar_helper.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/slice.h" // Slice
@@ -72,6 +73,8 @@ class InvertedIndexIterator;
using io::FileCacheManager;
+static bvar::LatencyRecorder g_segment_lookup_rowkey_latency("doris_pk", "segment_lookup_rowkey");
+
Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id,
RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
@@ -355,6 +358,7 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column,
}
Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, RowLocation* row_location) {
+ SCOPED_BVAR_LATENCY(g_segment_lookup_rowkey_latency);
RETURN_IF_ERROR(load_pk_index_and_bf());
bool has_seq_col = _tablet_schema->has_sequence_col();
size_t seq_col_length = 0;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 79d0237931..41a02a5a1c 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -107,6 +107,7 @@
#include "olap/utils.h"
#include "segment_loader.h"
#include "service/point_query_executor.h"
+#include "util/bvar_helper.h"
#include "util/defer_op.h"
#include "util/doris_metrics.h"
#include "util/pretty_printer.h"
@@ -139,6 +140,15 @@ using std::string;
using std::vector;
using io::FileSystemSPtr;
+static bvar::LatencyRecorder g_tablet_lookup_rowkey_latency("doris_pk", "tablet_lookup_rowkey");
+static bvar::LatencyRecorder g_tablet_commit_phase_update_delete_bitmap_latency(
+ "doris_pk", "commit_phase_update_delete_bitmap");
+static bvar::LatencyRecorder g_tablet_update_delete_bitmap_latency("doris_pk",
+ "update_delete_bitmap");
+static bvar::Adder<uint64_t> g_tablet_pk_not_found("doris_pk", "lookup_not_found");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_tablet_pk_not_found_per_second(
+ "doris_pk", "lookup_not_found_per_second", &g_tablet_pk_not_found, 60);
+
const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD = 10s;
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_bytes, MetricUnit::BYTES);
@@ -2695,6 +2705,7 @@ Status Tablet::lookup_row_key(
uint32_t version,
std::unordered_map<RowsetId, SegmentCacheHandle, HashOfRowsetId>& segment_caches,
RowsetSharedPtr* rowset) {
+ SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency);
size_t seq_col_length = 0;
if (_schema->has_sequence_col() && with_seq_col) {
seq_col_length = _schema->column(_schema->sequence_col_idx()).length() + 1;
@@ -2755,6 +2766,7 @@ Status Tablet::lookup_row_key(
return s;
}
}
+ g_tablet_pk_not_found << 1;
return Status::NotFound("can't find key in all rowsets");
}
@@ -3170,6 +3182,7 @@ Status Tablet::commit_phase_update_delete_bitmap(
const RowsetSharedPtr& rowset, RowsetIdUnorderedSet& pre_rowset_ids,
DeleteBitmapPtr delete_bitmap, const std::vector<segment_v2::SegmentSharedPtr>& segments,
int64_t txn_id, RowsetWriter* rowset_writer) {
+ SCOPED_BVAR_LATENCY(g_tablet_commit_phase_update_delete_bitmap_latency);
RowsetIdUnorderedSet cur_rowset_ids;
RowsetIdUnorderedSet rowset_ids_to_add;
RowsetIdUnorderedSet rowset_ids_to_del;
@@ -3211,6 +3224,7 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
const RowsetIdUnorderedSet& pre_rowset_ids,
DeleteBitmapPtr delete_bitmap, int64_t txn_id,
RowsetWriter* rowset_writer) {
+ SCOPED_BVAR_LATENCY(g_tablet_update_delete_bitmap_latency);
RowsetIdUnorderedSet cur_rowset_ids;
RowsetIdUnorderedSet rowset_ids_to_add;
RowsetIdUnorderedSet rowset_ids_to_del;
diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp
index bca689960a..748706e421 100644
--- a/be/src/olap/task/engine_publish_version_task.cpp
+++ b/be/src/olap/task/engine_publish_version_task.cpp
@@ -36,6 +36,7 @@
#include "olap/tablet_meta.h"
#include "olap/txn_manager.h"
#include "olap/utils.h"
+#include "util/bvar_helper.h"
#include "util/threadpool.h"
namespace doris {
@@ -44,6 +45,29 @@ using namespace ErrorCode;
using std::map;
+static bvar::LatencyRecorder g_tablet_publish_latency("doris_pk", "tablet_publish");
+static bvar::LatencyRecorder g_tablet_publish_schedule_latency("doris_pk",
+ "tablet_publish_schedule");
+static bvar::LatencyRecorder g_tablet_publish_lock_wait_latency("doris_pk",
+ "tablet_publish_lock_wait");
+static bvar::LatencyRecorder g_tablet_publish_save_meta_latency("doris_pk",
+ "tablet_publish_save_meta");
+static bvar::LatencyRecorder g_tablet_publish_delete_bitmap_latency("doris_pk",
+ "tablet_publish_delete_bitmap");
+static bvar::LatencyRecorder g_tablet_publish_partial_update_latency(
+ "doris_pk", "tablet_publish_partial_update");
+static bvar::LatencyRecorder g_tablet_publish_add_inc_latency("doris_pk",
+ "tablet_publish_add_inc_rowset");
+
+void TabletPublishStatistics::record_in_bvar() {
+ g_tablet_publish_schedule_latency << schedule_time_us;
+ g_tablet_publish_lock_wait_latency << lock_wait_time_us;
+ g_tablet_publish_save_meta_latency << save_meta_time_us;
+ g_tablet_publish_delete_bitmap_latency << calc_delete_bitmap_time_us;
+ g_tablet_publish_partial_update_latency << partial_update_write_segment_us;
+ g_tablet_publish_add_inc_latency << add_inc_rowset_us;
+}
+
EnginePublishVersionTask::EnginePublishVersionTask(
const TPublishVersionRequest& publish_version_req, std::vector<TTabletId>* error_tablet_ids,
std::vector<TTabletId>* succ_tablet_ids)
@@ -255,6 +279,8 @@ void TabletPublishTxnTask::handle() {
_engine_publish_version_task->add_succ_tablet_id(_tablet_info.tablet_id);
int64_t cost_us = MonotonicMicros() - _stats.submit_time_us;
// print stats if publish cost > 500ms
+ g_tablet_publish_latency << cost_us;
+ _stats.record_in_bvar();
LOG(INFO) << "publish version successfully on tablet"
<< ", table_id=" << _tablet->table_id() << ", tablet=" << _tablet->full_name()
<< ", transaction_id=" << _transaction_id << ", version=" << _version.first
diff --git a/be/src/olap/task/engine_publish_version_task.h b/be/src/olap/task/engine_publish_version_task.h
index efc7c8a4cf..bd6907b913 100644
--- a/be/src/olap/task/engine_publish_version_task.h
+++ b/be/src/olap/task/engine_publish_version_task.h
@@ -55,6 +55,8 @@ struct TabletPublishStatistics {
schedule_time_us, lock_wait_time_us, save_meta_time_us, calc_delete_bitmap_time_us,
partial_update_write_segment_us, add_inc_rowset_us);
}
+
+ void record_in_bvar();
};
class TabletPublishTxnTask {
diff --git a/be/src/util/bvar_helper.h b/be/src/util/bvar_helper.h
new file mode 100644
index 0000000000..b2439f2121
--- /dev/null
+++ b/be/src/util/bvar_helper.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <bvar/latency_recorder.h>
+
+#include "defer_op.h"
+#include "stopwatch.hpp"
+
+namespace doris {
+
+#define SCOPED_BVAR_LATENCY(bvar_item) \
+ MonotonicStopWatch __watch; \
+ __watch.start(); \
+ Defer __record_bvar([&] { bvar_item << __watch.elapsed_time() / 1000; });
+
+} // end namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org