You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2021/11/10 03:01:58 UTC

[GitHub] [incubator-doris] morningman commented on a change in pull request #7030: [Feature] Print peak memory use of all backend after query in audit log

morningman commented on a change in pull request #7030:
URL: https://github.com/apache/incubator-doris/pull/7030#discussion_r746202859



##########
File path: be/src/runtime/query_statistics.h
##########
@@ -27,25 +27,49 @@ namespace doris {
 
 class QueryStatisticsRecvr;
 
+class NodeStatistics {
+public:
+    NodeStatistics() : peak_memory_bytes(0) {};
+
+    void add_peak_memory(int64_t peak_memory) { this->peak_memory_bytes += peak_memory; }
+
+    void merge(const NodeStatistics& other);
+
+    void to_pb(PNodeStatistics* nodeStatistics);
+
+    void from_pb(const PNodeStatistics& nodeStatistics);
+
+private:
+    int64_t peak_memory_bytes;
+};
+
 // This is responsible for collecting query statistics, usually it consists of
 // two parts, one is current fragment or plan's statistics, the other is sub fragment
 // or plan's statistics and QueryStatisticsRecvr is responsible for collecting it.
 class QueryStatistics {
 public:
     QueryStatistics() : scan_rows(0), scan_bytes(0), cpu_ms(0), returned_rows(0) {}
 
-    void merge(const QueryStatistics& other) {
-        scan_rows += other.scan_rows;
-        scan_bytes += other.scan_bytes;
-        cpu_ms += other.cpu_ms;
-    }
+    void merge(const QueryStatistics& other);
 
     void add_scan_rows(int64_t scan_rows) { this->scan_rows += scan_rows; }
 
     void add_scan_bytes(int64_t scan_bytes) { this->scan_bytes += scan_bytes; }
 
     void add_cpu_ms(int64_t cpu_ms) { this->cpu_ms += cpu_ms; }
 
+    NodeStatistics* add_nodes_statistics(int64_t node_id) {
+        NodeStatistics* nodeStatistics = nullptr;
+        auto iter = nodes_statistics_map.find(node_id);
+        if (iter == nodes_statistics_map.end()) {
+            nodeStatistics = new NodeStatistics;

Review comment:
       You need to add `~QueryStatistics()` to clear the objects in `nodes_statistics_map`, or there will be memory leak.

##########
File path: be/src/runtime/query_statistics.h
##########
@@ -78,6 +93,10 @@ class QueryStatistics {
     // number rows returned by query.
     // only set once by result sink when closing.
     int64_t returned_rows;
+
+    // The statistics of the query on each backend.
+    typedef std::unordered_map<int64_t, NodeStatistics*> NodeStatisticsMap;
+    NodeStatisticsMap nodes_statistics_map;

Review comment:
       ```suggestion
       NodeStatisticsMap _nodes_statistics_map;
   ```

##########
File path: be/src/runtime/plan_fragment_executor.cpp
##########
@@ -81,6 +81,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request,
 
     RETURN_IF_ERROR(_runtime_state->init_mem_trackers(_query_id));
     _runtime_state->set_be_number(request.backend_num);
+    _runtime_state->set_backend_id(request.backend_id);

Review comment:
       Need to check if `backend_Id` is set in `request`, for forward compatibility. Or set a default value for `backend_id` in proto file.

##########
File path: fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java
##########
@@ -123,6 +124,15 @@ private void auditAfterExec(String origStmt, StatementBase parsedStmt, Data.PQue
             .setStmtId(ctx.getStmtId())
             .setQueryId(ctx.queryId() == null ? "NaN" : DebugUtil.printId(ctx.queryId()));
 
+        if (statistics != null && statistics.getNodesStatisticsCount() != 0) {

Review comment:
       I think this can be merge on BE side? Or FE will become the bottleneck

##########
File path: be/src/runtime/query_statistics.cpp
##########
@@ -19,6 +19,54 @@
 
 namespace doris {
 
+void NodeStatistics::merge(const NodeStatistics& other) {
+    peak_memory_bytes += other.peak_memory_bytes;
+}
+
+void NodeStatistics::to_pb(PNodeStatistics* nodeStatistics) {

Review comment:
       ```suggestion
   void NodeStatistics::to_pb(PNodeStatistics* node_statistics) {
   ```

##########
File path: fe/fe-core/src/main/java/org/apache/doris/plugin/AuditEvent.java
##########
@@ -79,6 +83,8 @@
     public String stmt = "";
     @AuditField(value = "CpuTimeMS")
     public long cpuTimeMs = -1;
+    @AuditField(value = "PeakMemory")

Review comment:
       Add to the end, for compatibility.
   And better use `long` instead of string, so that it can be easily analyzed by application

##########
File path: be/src/runtime/query_statistics.h
##########
@@ -55,21 +79,12 @@ class QueryStatistics {
         scan_bytes = 0;
         cpu_ms = 0;
         returned_rows = 0;
+        nodes_statistics_map.clear();

Review comment:
       This clear() will not release the memory allocated for `NodeStatistics` in `nodes_statistics_map`

##########
File path: be/src/runtime/query_statistics.cpp
##########
@@ -19,6 +19,54 @@
 
 namespace doris {
 
+void NodeStatistics::merge(const NodeStatistics& other) {
+    peak_memory_bytes += other.peak_memory_bytes;
+}
+
+void NodeStatistics::to_pb(PNodeStatistics* nodeStatistics) {
+    DCHECK(nodeStatistics != nullptr);
+    nodeStatistics->set_peak_memory_bytes(peak_memory_bytes);
+}
+
+void NodeStatistics::from_pb(const PNodeStatistics& nodeStatistics) {

Review comment:
       ```suggestion
   void NodeStatistics::from_pb(const PNodeStatistics& node_statistics) {
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org