You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2023/06/19 07:29:50 UTC

[doris] branch master updated: [enhancement](pk) add bvar latency recorder for pk (#20942)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 87e3a79387 [enhancement](pk) add bvar latency recorder for pk (#20942)
87e3a79387 is described below

commit 87e3a79387ff3d7d67a758001dde4223131b72cf
Author: Yongqiang YANG <98...@users.noreply.github.com>
AuthorDate: Mon Jun 19 15:29:42 2023 +0800

    [enhancement](pk) add bvar latency recorder for pk (#20942)
---
 be/src/agent/task_worker_pool.cpp                  |  7 ++++-
 be/src/olap/lru_cache.cpp                          |  7 +++++
 be/src/olap/lru_cache.h                            |  5 ++++
 .../rowset/segment_v2/indexed_column_reader.cpp    | 21 ++++++++++++++-
 be/src/olap/rowset/segment_v2/segment.cpp          |  4 +++
 be/src/olap/tablet.cpp                             | 14 ++++++++++
 be/src/olap/task/engine_publish_version_task.cpp   | 26 ++++++++++++++++++
 be/src/olap/task/engine_publish_version_task.h     |  2 ++
 be/src/util/bvar_helper.h                          | 31 ++++++++++++++++++++++
 9 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index 100ac2fd4a..06e4694ac7 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -96,6 +96,8 @@ std::atomic_ulong TaskWorkerPool::_s_report_version(time(nullptr) * 10000);
 std::mutex TaskWorkerPool::_s_task_signatures_lock;
 std::map<TTaskType::type, std::set<int64_t>> TaskWorkerPool::_s_task_signatures;
 
+static bvar::LatencyRecorder g_publish_version_latency("doris_pk", "publish_version");
+
 TaskWorkerPool::TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* env,
                                const TMasterInfo& master_info, ThreadModel thread_model)
         : _master_info(master_info),
@@ -1440,6 +1442,7 @@ void PublishVersionTaskPool::_publish_version_worker_thread_callback() {
             agent_task_req = _tasks.front();
             _tasks.pop_front();
         }
+
         const TPublishVersionRequest& publish_version_req = agent_task_req.publish_version_req;
         DorisMetrics::instance()->publish_task_request_total->increment(1);
         VLOG_NOTICE << "get publish version task. signature=" << agent_task_req.signature;
@@ -1518,11 +1521,13 @@ void PublishVersionTaskPool::_publish_version_worker_thread_callback() {
                     }
                 }
             }
+            uint32_t cost_second = time(nullptr) - agent_task_req.recv_time;
+            g_publish_version_latency << cost_second;
             LOG_INFO("successfully publish version")
                     .tag("signature", agent_task_req.signature)
                     .tag("transaction_id", publish_version_req.transaction_id)
                     .tag("tablets_num", succ_tablet_ids.size())
-                    .tag("cost(s)", time(nullptr) - agent_task_req.recv_time);
+                    .tag("cost(s)", cost_second);
         }
 
         status.to_thrift(&finish_task_request.task_status);
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 000315f773..a79e6fbf96 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -548,6 +548,13 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity,
     INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count);
     INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count);
     INT_DOUBLE_METRIC_REGISTER(_entity, cache_hit_ratio);
+
+    _hit_count_bvar.reset(new bvar::Adder<uint64_t>("doris_cache", _name));
+    _hit_count_per_second.reset(new bvar::PerSecond<bvar::Adder<uint64_t>>(
+            "doris_cache", _name + "_persecond", _hit_count_bvar.get(), 60));
+    _lookup_count_bvar.reset(new bvar::Adder<uint64_t>("doris_cache", _name));
+    _lookup_count_per_second.reset(new bvar::PerSecond<bvar::Adder<uint64_t>>(
+            "doris_cache", _name + "_persecond", _lookup_count_bvar.get(), 60));
 }
 
 ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type,
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index f169fea434..4a9d45b07f 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -443,6 +443,11 @@ private:
     IntAtomicCounter* cache_lookup_count = nullptr;
     IntAtomicCounter* cache_hit_count = nullptr;
     DoubleGauge* cache_hit_ratio = nullptr;
+    // bvars
+    std::unique_ptr<bvar::Adder<uint64_t>> _hit_count_bvar;
+    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _hit_count_per_second;
+    std::unique_ptr<bvar::Adder<uint64_t>> _lookup_count_bvar;
+    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _lookup_count_per_second;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index fb36f85ccc..eaba30eba2 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -30,11 +30,25 @@
 #include "olap/rowset/segment_v2/page_io.h"
 #include "olap/types.h"
 #include "util/block_compression.h"
+#include "util/bvar_helper.h"
 
 namespace doris {
 using namespace ErrorCode;
 namespace segment_v2 {
 
+static bvar::Adder<uint64_t> g_index_reader_bytes("doris_pk", "index_reader_bytes");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_bytes_per_second(
+        "doris_pk", "index_reader_bytes_per_second", &g_index_reader_bytes, 60);
+static bvar::Adder<uint64_t> g_index_reader_pages("doris_pk", "index_reader_pages");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_pages_per_second(
+        "doris_pk", "index_reader_pages_per_second", &g_index_reader_pages, 60);
+static bvar::Adder<uint64_t> g_index_reader_seek_count("doris_pk", "index_reader_seek_count");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_seek_per_second(
+        "doris_pk", "index_reader_seek_per_second", &g_index_reader_seek_count, 60);
+static bvar::Adder<uint64_t> g_index_reader_pk_pages("doris_pk", "index_reader_pk_pages");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_pk_bytes_per_second(
+        "doris_pk", "index_reader_pk_pages_per_second", &g_index_reader_pk_pages, 60);
+
 using strings::Substitute;
 
 Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) {
@@ -104,7 +118,10 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle,
     opts.encoding_info = _encoding_info;
     opts.pre_decode = pre_decode;
 
-    return PageIO::read_and_decompress_page(opts, handle, body, footer);
+    auto st = PageIO::read_and_decompress_page(opts, handle, body, footer);
+    g_index_reader_bytes << footer->uncompressed_size();
+    g_index_reader_pages << 1;
+    return st;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -179,6 +196,8 @@ Status IndexedColumnIterator::seek_at_or_after(const void* key, bool* exact_matc
         return Status::NotFound("value index is empty ");
     }
 
+    g_index_reader_seek_count << 1;
+
     bool load_data_page = false;
     PagePointer data_page_pp;
     if (_reader->_has_index_page) {
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index e046785bb0..50dc44ee73 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -52,6 +52,7 @@
 #include "runtime/query_context.h"
 #include "runtime/runtime_predicate.h"
 #include "runtime/runtime_state.h"
+#include "util/bvar_helper.h"
 #include "util/coding.h"
 #include "util/crc32c.h"
 #include "util/slice.h" // Slice
@@ -72,6 +73,8 @@ class InvertedIndexIterator;
 
 using io::FileCacheManager;
 
+static bvar::LatencyRecorder g_segment_lookup_rowkey_latency("doris_pk", "segment_lookup_rowkey");
+
 Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id,
                      RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
                      const io::FileReaderOptions& reader_options,
@@ -355,6 +358,7 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column,
 }
 
 Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, RowLocation* row_location) {
+    SCOPED_BVAR_LATENCY(g_segment_lookup_rowkey_latency);
     RETURN_IF_ERROR(load_pk_index_and_bf());
     bool has_seq_col = _tablet_schema->has_sequence_col();
     size_t seq_col_length = 0;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 79d0237931..41a02a5a1c 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -107,6 +107,7 @@
 #include "olap/utils.h"
 #include "segment_loader.h"
 #include "service/point_query_executor.h"
+#include "util/bvar_helper.h"
 #include "util/defer_op.h"
 #include "util/doris_metrics.h"
 #include "util/pretty_printer.h"
@@ -139,6 +140,15 @@ using std::string;
 using std::vector;
 using io::FileSystemSPtr;
 
+static bvar::LatencyRecorder g_tablet_lookup_rowkey_latency("doris_pk", "tablet_lookup_rowkey");
+static bvar::LatencyRecorder g_tablet_commit_phase_update_delete_bitmap_latency(
+        "doris_pk", "commit_phase_update_delete_bitmap");
+static bvar::LatencyRecorder g_tablet_update_delete_bitmap_latency("doris_pk",
+                                                                   "update_delete_bitmap");
+static bvar::Adder<uint64_t> g_tablet_pk_not_found("doris_pk", "lookup_not_found");
+static bvar::PerSecond<bvar::Adder<uint64_t>> g_tablet_pk_not_found_per_second(
+        "doris_pk", "lookup_not_found_per_second", &g_tablet_pk_not_found, 60);
+
 const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD = 10s;
 
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_bytes, MetricUnit::BYTES);
@@ -2695,6 +2705,7 @@ Status Tablet::lookup_row_key(
         uint32_t version,
         std::unordered_map<RowsetId, SegmentCacheHandle, HashOfRowsetId>& segment_caches,
         RowsetSharedPtr* rowset) {
+    SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency);
     size_t seq_col_length = 0;
     if (_schema->has_sequence_col() && with_seq_col) {
         seq_col_length = _schema->column(_schema->sequence_col_idx()).length() + 1;
@@ -2755,6 +2766,7 @@ Status Tablet::lookup_row_key(
             return s;
         }
     }
+    g_tablet_pk_not_found << 1;
     return Status::NotFound("can't find key in all rowsets");
 }
 
@@ -3170,6 +3182,7 @@ Status Tablet::commit_phase_update_delete_bitmap(
         const RowsetSharedPtr& rowset, RowsetIdUnorderedSet& pre_rowset_ids,
         DeleteBitmapPtr delete_bitmap, const std::vector<segment_v2::SegmentSharedPtr>& segments,
         int64_t txn_id, RowsetWriter* rowset_writer) {
+    SCOPED_BVAR_LATENCY(g_tablet_commit_phase_update_delete_bitmap_latency);
     RowsetIdUnorderedSet cur_rowset_ids;
     RowsetIdUnorderedSet rowset_ids_to_add;
     RowsetIdUnorderedSet rowset_ids_to_del;
@@ -3211,6 +3224,7 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
                                     const RowsetIdUnorderedSet& pre_rowset_ids,
                                     DeleteBitmapPtr delete_bitmap, int64_t txn_id,
                                     RowsetWriter* rowset_writer) {
+    SCOPED_BVAR_LATENCY(g_tablet_update_delete_bitmap_latency);
     RowsetIdUnorderedSet cur_rowset_ids;
     RowsetIdUnorderedSet rowset_ids_to_add;
     RowsetIdUnorderedSet rowset_ids_to_del;
diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp
index bca689960a..748706e421 100644
--- a/be/src/olap/task/engine_publish_version_task.cpp
+++ b/be/src/olap/task/engine_publish_version_task.cpp
@@ -36,6 +36,7 @@
 #include "olap/tablet_meta.h"
 #include "olap/txn_manager.h"
 #include "olap/utils.h"
+#include "util/bvar_helper.h"
 #include "util/threadpool.h"
 
 namespace doris {
@@ -44,6 +45,29 @@ using namespace ErrorCode;
 
 using std::map;
 
+static bvar::LatencyRecorder g_tablet_publish_latency("doris_pk", "tablet_publish");
+static bvar::LatencyRecorder g_tablet_publish_schedule_latency("doris_pk",
+                                                               "tablet_publish_schedule");
+static bvar::LatencyRecorder g_tablet_publish_lock_wait_latency("doris_pk",
+                                                                "tablet_publish_lock_wait");
+static bvar::LatencyRecorder g_tablet_publish_save_meta_latency("doris_pk",
+                                                                "tablet_publish_save_meta");
+static bvar::LatencyRecorder g_tablet_publish_delete_bitmap_latency("doris_pk",
+                                                                    "tablet_publish_delete_bitmap");
+static bvar::LatencyRecorder g_tablet_publish_partial_update_latency(
+        "doris_pk", "tablet_publish_partial_update");
+static bvar::LatencyRecorder g_tablet_publish_add_inc_latency("doris_pk",
+                                                              "tablet_publish_add_inc_rowset");
+
+void TabletPublishStatistics::record_in_bvar() {
+    g_tablet_publish_schedule_latency << schedule_time_us;
+    g_tablet_publish_lock_wait_latency << lock_wait_time_us;
+    g_tablet_publish_save_meta_latency << save_meta_time_us;
+    g_tablet_publish_delete_bitmap_latency << calc_delete_bitmap_time_us;
+    g_tablet_publish_partial_update_latency << partial_update_write_segment_us;
+    g_tablet_publish_add_inc_latency << add_inc_rowset_us;
+}
+
 EnginePublishVersionTask::EnginePublishVersionTask(
         const TPublishVersionRequest& publish_version_req, std::vector<TTabletId>* error_tablet_ids,
         std::vector<TTabletId>* succ_tablet_ids)
@@ -255,6 +279,8 @@ void TabletPublishTxnTask::handle() {
     _engine_publish_version_task->add_succ_tablet_id(_tablet_info.tablet_id);
     int64_t cost_us = MonotonicMicros() - _stats.submit_time_us;
     // print stats if publish cost > 500ms
+    g_tablet_publish_latency << cost_us;
+    _stats.record_in_bvar();
     LOG(INFO) << "publish version successfully on tablet"
               << ", table_id=" << _tablet->table_id() << ", tablet=" << _tablet->full_name()
               << ", transaction_id=" << _transaction_id << ", version=" << _version.first
diff --git a/be/src/olap/task/engine_publish_version_task.h b/be/src/olap/task/engine_publish_version_task.h
index efc7c8a4cf..bd6907b913 100644
--- a/be/src/olap/task/engine_publish_version_task.h
+++ b/be/src/olap/task/engine_publish_version_task.h
@@ -55,6 +55,8 @@ struct TabletPublishStatistics {
                 schedule_time_us, lock_wait_time_us, save_meta_time_us, calc_delete_bitmap_time_us,
                 partial_update_write_segment_us, add_inc_rowset_us);
     }
+
+    void record_in_bvar();
 };
 
 class TabletPublishTxnTask {
diff --git a/be/src/util/bvar_helper.h b/be/src/util/bvar_helper.h
new file mode 100644
index 0000000000..b2439f2121
--- /dev/null
+++ b/be/src/util/bvar_helper.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <bvar/latency_recorder.h>
+
+#include "defer_op.h"
+#include "stopwatch.hpp"
+
+namespace doris {
+
+#define SCOPED_BVAR_LATENCY(bvar_item) \
+    MonotonicStopWatch __watch;        \
+    __watch.start();                   \
+    Defer __record_bvar([&] { bvar_item << __watch.elapsed_time() / 1000; });
+
+} // end namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org