You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by "KKcorps (via GitHub)" <gi...@apache.org> on 2023/02/24 13:59:08 UTC

[GitHub] [pinot] KKcorps opened a new pull request, #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

KKcorps opened a new pull request, #10337:
URL: https://github.com/apache/pinot/pull/10337

   Currently, we only return aggregated statistics in the broker response. This already helps out in debugging performance  issues. However,  stats aggregated at stage level will be much more useful because the operations done by leaf stage vs intermediate stage differ a lot.
   
   The PR contains the following changes:
   
   * Add a new BrokerResponseNativeV2 class that contains Stage level stats
   * Add support for BrokerMetrics by passing down the table name in OperatorStats
   
   
   Example
   
   ```json
   {
      "resultTable":{
         "dataSchema":{
            "columnNames":[
               "playerName",
               "teamID",
               "teamName"
            ],
            "columnDataTypes":[
               "STRING",
               "STRING",
               "STRING"
            ]
         },
         "rows":[
            [
               "Fausto Andres",
               "CIN",
               "Cincinnati Reds/Red Stockings"
            ],
            [
               "Fernando Antonio",
               "HOU",
               "Houston Astros/Colt .45s"
            ],
            [
               "Fernando Antonio",
               "HOU",
               "Houston Astros/Colt .45s"
            ],
            [
               "Fernando Antonio",
               "HOU",
               "Houston Astros/Colt .45s"
            ],
            [
               "Harry Frederick",
               "CLE",
               "Cleveland Indians/Naps/Broncos/Bluebirds/Lake Shores"
            ],
            [
               "Harry Frederick",
               "CLE",
               "Cleveland Indians/Naps/Broncos/Bluebirds/Lake Shores"
            ],
            [
               "William Glenn",
               "DET",
               "Detroit Tigers"
            ],
            [
               "William Glenn",
               "DET",
               "Detroit Tigers"
            ],
            [
               "Paul David",
               "CLE",
               "Cleveland Indians/Naps/Broncos/Bluebirds/Lake Shores"
            ],
            [
               "Albert Julius",
               "CLE",
               "Cleveland Indians/Naps/Broncos/Bluebirds/Lake Shores"
            ]
         ]
      },
      "stageStats":{
         "1":{
            "numBlocks":19,
            "numRows":30,
            "threadExecutionTime":642,
            "operatorIds":[
               "MAILBOX_RECEIVE_3_1_0@127.0.0.1:8421",
               "SORT_3_1_0@127.0.0.1:8444",
               "SORT_3_1_0@127.0.0.1:8442",
               "SORT_3_1_0@127.0.0.1:8443",
               "TRANSFORM_3_1_0@127.0.0.1:8444",
               "TRANSFORM_3_1_0@127.0.0.1:8443",
               "TRANSFORM_3_1_0@127.0.0.1:8442"
            ]
         },
         "2":{
            "numBlocks":40,
            "numRows":47795,
            "threadExecutionTime":630,
            "operatorIds":[
               "MAILBOX_RECEIVE_3_2_0@127.0.0.1:8444",
               "MAILBOX_RECEIVE_3_2_0@127.0.0.1:8443",
               "MAILBOX_RECEIVE_3_2_0@127.0.0.1:8442",
               "SORT_3_2_0@127.0.0.1:8442",
               "HASH_JOIN_3_2_0@127.0.0.1:8444",
               "HASH_JOIN_3_2_0@127.0.0.1:8443",
               "SORT_3_2_0@127.0.0.1:8444",
               "HASH_JOIN_3_2_0@127.0.0.1:8442",
               "SORT_3_2_0@127.0.0.1:8443"
            ]
         },
         "3":{
            "numBlocks":12,
            "numRows":195778,
            "threadExecutionTime":75,
            "numSegmentsQueried":1,
            "numSegmentsProcessed":1,
            "numSegmentsMatched":1,
            "numDocsScanned":97889,
            "numEntriesScannedPostFilter":195778,
            "totalDocs":97889,
            "operatorIds":[
               "MAILBOX_RECEIVE_3_3_0@127.0.0.1:8444",
               "MAILBOX_RECEIVE_3_3_0@127.0.0.1:8443",
               "MAILBOX_RECEIVE_3_3_0@127.0.0.1:8442",
               "LEAF_STAGE_TRANSFER_OPERATOR_3_3_0@127.0.0.1:8443"
            ]
         },
         "4":{
            "numBlocks":13,
            "numRows":102,
            "threadExecutionTime":21,
            "numSegmentsQueried":1,
            "numSegmentsProcessed":1,
            "numSegmentsMatched":1,
            "numDocsScanned":51,
            "numEntriesScannedPostFilter":102,
            "totalDocs":51,
            "operatorIds":[
               "MAILBOX_RECEIVE_3_4_0@127.0.0.1:8442",
               "MAILBOX_RECEIVE_3_4_0@127.0.0.1:8444",
               "MAILBOX_RECEIVE_3_4_0@127.0.0.1:8443",
               "LEAF_STAGE_TRANSFER_OPERATOR_3_4_0@127.0.0.1:8442"
            ]
         }
      },
      "exceptions":[
         
      ],
      "numServersQueried":0,
      "numServersResponded":0,
      "numSegmentsQueried":2,
      "numSegmentsProcessed":2,
      "numSegmentsMatched":2,
      "numConsumingSegmentsQueried":0,
      "numConsumingSegmentsProcessed":0,
      "numConsumingSegmentsMatched":0,
      "numDocsScanned":97940,
      "numEntriesScannedInFilter":0,
      "numEntriesScannedPostFilter":195880,
      "numGroupsLimitReached":false,
      "totalDocs":97940,
      "timeUsedMs":108,
      "offlineThreadCpuTimeNs":0,
      "realtimeThreadCpuTimeNs":0,
      "offlineSystemActivitiesCpuTimeNs":0,
      "realtimeSystemActivitiesCpuTimeNs":0,
      "offlineResponseSerializationCpuTimeNs":0,
      "realtimeResponseSerializationCpuTimeNs":0,
      "offlineTotalCpuTimeNs":0,
      "realtimeTotalCpuTimeNs":0,
      "segmentStatistics":[
         
      ],
      "traceInfo":{
         
      },
      "minConsumingFreshnessTimeMs":0,
      "numSegmentsPrunedByBroker":0,
      "numSegmentsPrunedByServer":0,
      "numSegmentsPrunedInvalid":0,
      "numSegmentsPrunedByLimit":0,
      "numSegmentsPrunedByValue":0,
      "explainPlanNumEmptyFilterSegments":0,
      "explainPlanNumMatchAllFilterSegments":0,
      "numRowsResultSet":10
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] KKcorps commented on a diff in pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "KKcorps (via GitHub)" <gi...@apache.org>.
KKcorps commented on code in PR #10337:
URL: https://github.com/apache/pinot/pull/10337#discussion_r1119614693


##########
pinot-common/src/main/java/org/apache/pinot/common/datatable/DataTable.java:
##########
@@ -102,16 +102,14 @@ enum MetadataValueType {
    */
   enum MetadataKey {
     UNKNOWN(0, "unknown", MetadataValueType.STRING),
-    TABLE(1, "table", MetadataValueType.STRING), // NOTE: this key is only used in PrioritySchedulerTest
+    TABLE(1, "table", MetadataValueType.STRING),

Review Comment:
   This is dependent on `ExecutionStatsAggregator` not being dependent on field names, plus InstanceResponseBlock also not returning the field names. Both of these need to be changed first, for this to work.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] walterddr commented on a diff in pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "walterddr (via GitHub)" <gi...@apache.org>.
walterddr commented on code in PR #10337:
URL: https://github.com/apache/pinot/pull/10337#discussion_r1117955575


##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/OperatorStats.java:
##########
@@ -65,14 +66,18 @@ public void recordRow(int numBlock, int numRows) {
     _numRows += numRows;
   }
 
+  public void recordSingleStat(String key, String stat) {
+    _executionStats.put(key, stat);
+  }
+
   public void recordExecutionStats(Map<String, String> executionStats) {
-    _executionStats = executionStats;
+    _executionStats.putAll(executionStats);
   }
 
   public Map<String, String> getExecutionStats() {
-    _executionStats.put(OperatorUtils.NUM_BLOCKS, String.valueOf(_numBlock));
-    _executionStats.put(OperatorUtils.NUM_ROWS, String.valueOf(_numRows));
-    _executionStats.put(OperatorUtils.THREAD_EXECUTION_TIME,
+    _executionStats.putIfAbsent(DataTable.MetadataKey.NUM_BLOCKS.getName(), String.valueOf(_numBlock));
+    _executionStats.putIfAbsent(DataTable.MetadataKey.NUM_ROWS.getName(), String.valueOf(_numRows));
+    _executionStats.putIfAbsent(DataTable.MetadataKey.OPERATOR_EXECUTION_TIME_MS.getName(),

Review Comment:
   what's the reason for putIfAbsent?



##########
pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseNativeV2.java:
##########
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.response.broker;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.response.ProcessingException;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.spi.utils.JsonUtils;
+
+
+/**
+ * This class implements pinot-broker's response format for any given query.
+ * All fields either primitive data types, or native objects (as opposed to JSONObjects).
+ *
+ * Supports serialization via JSON.
+ */
+@JsonPropertyOrder({
+    "resultTable", "stageStats", "exceptions", "numServersQueried", "numServersResponded", "numSegmentsQueried",
+    "numSegmentsProcessed", "numSegmentsMatched", "numConsumingSegmentsQueried", "numConsumingSegmentsProcessed",
+    "numConsumingSegmentsMatched", "numDocsScanned", "numEntriesScannedInFilter", "numEntriesScannedPostFilter",
+    "numGroupsLimitReached", "totalDocs", "timeUsedMs", "offlineThreadCpuTimeNs", "realtimeThreadCpuTimeNs",
+    "offlineSystemActivitiesCpuTimeNs", "realtimeSystemActivitiesCpuTimeNs", "offlineResponseSerializationCpuTimeNs",
+    "realtimeResponseSerializationCpuTimeNs", "offlineTotalCpuTimeNs", "realtimeTotalCpuTimeNs", "segmentStatistics",
+    "traceInfo"
+})
+public class BrokerResponseNativeV2 extends BrokerResponseNative {

Review Comment:
   can we add a test for V1 and V2 native response type?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] KKcorps commented on pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "KKcorps (via GitHub)" <gi...@apache.org>.
KKcorps commented on PR #10337:
URL: https://github.com/apache/pinot/pull/10337#issuecomment-1444629390

   > Excited to see this. @KKcorps : Can you confirm if this also uses `OpChainStats` in any way? I was wondering if we could also track queued time in these stats later.
   
   Not right now. But I will include those in the next PR along with some other missing stats.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] ankitsultana commented on pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "ankitsultana (via GitHub)" <gi...@apache.org>.
ankitsultana commented on PR #10337:
URL: https://github.com/apache/pinot/pull/10337#issuecomment-1444610934

   Excited to see this. @KKcorps : Can you confirm if this also uses `OpChainStats` in any way? I was wondering if we could also track queued time in these stats later.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] codecov-commenter commented on pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "codecov-commenter (via GitHub)" <gi...@apache.org>.
codecov-commenter commented on PR #10337:
URL: https://github.com/apache/pinot/pull/10337#issuecomment-1443768255

   # [Codecov](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
   > Merging [#10337](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (89c2362) into [master](https://codecov.io/gh/apache/pinot/commit/1fce07ed77ff31af6d3b85a6b03a4b76632ed133?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (1fce07e) will **decrease** coverage by `55.10%`.
   > The diff coverage is `0.00%`.
   
   ```diff
   @@              Coverage Diff              @@
   ##             master   #10337       +/-   ##
   =============================================
   - Coverage     68.82%   13.73%   -55.10%     
   + Complexity     5851      221     -5630     
   =============================================
     Files          2027     1975       -52     
     Lines        109891   107763     -2128     
     Branches      16685    16433      -252     
   =============================================
   - Hits          75637    14797    -60840     
   - Misses        28897    91788    +62891     
   + Partials       5357     1178     -4179     
   ```
   
   | Flag | Coverage Δ | |
   |---|---|---|
   | integration2 | `?` | |
   | unittests1 | `?` | |
   | unittests2 | `13.73% <0.00%> (-0.05%)` | :arrow_down: |
   
   Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
   
   | [Impacted Files](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
   |---|---|---|
   | [...requesthandler/MultiStageBrokerRequestHandler.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtYnJva2VyL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9icm9rZXIvcmVxdWVzdGhhbmRsZXIvTXVsdGlTdGFnZUJyb2tlclJlcXVlc3RIYW5kbGVyLmphdmE=) | `0.00% <0.00%> (ø)` | |
   | [...common/response/broker/BrokerResponseNativeV2.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vcmVzcG9uc2UvYnJva2VyL0Jyb2tlclJlc3BvbnNlTmF0aXZlVjIuamF2YQ==) | `0.00% <0.00%> (ø)` | |
   | [...ot/common/response/broker/BrokerResponseStats.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vcmVzcG9uc2UvYnJva2VyL0Jyb2tlclJlc3BvbnNlU3RhdHMuamF2YQ==) | `0.00% <0.00%> (ø)` | |
   | [...ot/core/query/reduce/ExecutionStatsAggregator.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9xdWVyeS9yZWR1Y2UvRXhlY3V0aW9uU3RhdHNBZ2dyZWdhdG9yLmphdmE=) | `0.00% <0.00%> (-96.45%)` | :arrow_down: |
   | [...not/query/runtime/operator/MultiStageOperator.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtcXVlcnktcnVudGltZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvcXVlcnkvcnVudGltZS9vcGVyYXRvci9NdWx0aVN0YWdlT3BlcmF0b3IuamF2YQ==) | `0.00% <0.00%> (-67.45%)` | :arrow_down: |
   | [...he/pinot/query/runtime/operator/OperatorStats.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtcXVlcnktcnVudGltZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvcXVlcnkvcnVudGltZS9vcGVyYXRvci9PcGVyYXRvclN0YXRzLmphdmE=) | `0.00% <0.00%> (-90.33%)` | :arrow_down: |
   | [...ot/query/runtime/operator/utils/OperatorUtils.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtcXVlcnktcnVudGltZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvcXVlcnkvcnVudGltZS9vcGVyYXRvci91dGlscy9PcGVyYXRvclV0aWxzLmphdmE=) | `0.00% <ø> (-82.46%)` | :arrow_down: |
   | [...rg/apache/pinot/query/service/QueryDispatcher.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtcXVlcnktcnVudGltZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvcXVlcnkvc2VydmljZS9RdWVyeURpc3BhdGNoZXIuamF2YQ==) | `0.00% <0.00%> (-79.25%)` | :arrow_down: |
   | [...src/main/java/org/apache/pinot/sql/FilterKind.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9zcWwvRmlsdGVyS2luZC5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...ain/java/org/apache/pinot/core/data/table/Key.java](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9kYXRhL3RhYmxlL0tleS5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | ... and [1568 more](https://codecov.io/gh/apache/pinot/pull/10337?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
   
   :mega: We’re building smart automated test selection to slash your CI/CD build times. [Learn more](https://about.codecov.io/iterative-testing/?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] KKcorps commented on a diff in pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "KKcorps (via GitHub)" <gi...@apache.org>.
KKcorps commented on code in PR #10337:
URL: https://github.com/apache/pinot/pull/10337#discussion_r1117956075


##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/OperatorStats.java:
##########
@@ -65,14 +66,18 @@ public void recordRow(int numBlock, int numRows) {
     _numRows += numRows;
   }
 
+  public void recordSingleStat(String key, String stat) {
+    _executionStats.put(key, stat);
+  }
+
   public void recordExecutionStats(Map<String, String> executionStats) {
-    _executionStats = executionStats;
+    _executionStats.putAll(executionStats);
   }
 
   public Map<String, String> getExecutionStats() {
-    _executionStats.put(OperatorUtils.NUM_BLOCKS, String.valueOf(_numBlock));
-    _executionStats.put(OperatorUtils.NUM_ROWS, String.valueOf(_numRows));
-    _executionStats.put(OperatorUtils.THREAD_EXECUTION_TIME,
+    _executionStats.putIfAbsent(DataTable.MetadataKey.NUM_BLOCKS.getName(), String.valueOf(_numBlock));
+    _executionStats.putIfAbsent(DataTable.MetadataKey.NUM_ROWS.getName(), String.valueOf(_numRows));
+    _executionStats.putIfAbsent(DataTable.MetadataKey.OPERATOR_EXECUTION_TIME_MS.getName(),

Review Comment:
   .getExecutionStats is called multiple times but recording stats in the map only once is enough.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] KKcorps merged pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "KKcorps (via GitHub)" <gi...@apache.org>.
KKcorps merged PR #10337:
URL: https://github.com/apache/pinot/pull/10337


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] KKcorps commented on pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "KKcorps (via GitHub)" <gi...@apache.org>.
KKcorps commented on PR #10337:
URL: https://github.com/apache/pinot/pull/10337#issuecomment-1444568672

   I think **BrokerResponseStats** class can be made an extension of **BrokerResponseNative** class as well. Only differrence will be not having a resultTable plus having additional stage level stats.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] walterddr commented on a diff in pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "walterddr (via GitHub)" <gi...@apache.org>.
walterddr commented on code in PR #10337:
URL: https://github.com/apache/pinot/pull/10337#discussion_r1119132865


##########
pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseStats.java:
##########
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.response.broker;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.pinot.spi.utils.JsonUtils;
+
+
+@JsonPropertyOrder({"exceptions", "numBlocks", "numRows", "stageExecutionTimeMs", "numServersQueried",
+    "numServersResponded", "numSegmentsQueried", "numSegmentsProcessed", "numSegmentsMatched",
+    "numConsumingSegmentsQueried", "numConsumingSegmentsProcessed", "numConsumingSegmentsMatched",
+    "numDocsScanned", "numEntriesScannedInFilter", "numEntriesScannedPostFilter", "numGroupsLimitReached",
+    "totalDocs", "timeUsedMs", "offlineThreadCpuTimeNs", "realtimeThreadCpuTimeNs",
+    "offlineSystemActivitiesCpuTimeNs", "realtimeSystemActivitiesCpuTimeNs", "offlineResponseSerializationCpuTimeNs",
+    "realtimeResponseSerializationCpuTimeNs", "offlineTotalCpuTimeNs", "realtimeTotalCpuTimeNs",
+    "traceInfo", "operatorIds", "tableNames"})
+@JsonInclude(JsonInclude.Include.NON_DEFAULT)
+public class BrokerResponseStats extends BrokerResponseNative {

Review Comment:
   add a TODO: let's think of a way to 
   1. cleanly decouple the v1 and v2 response native format.
       - decouple the execution stats aggregator logic and make it into a util that can aggregate 2 values with the same metadataKey (not tied with member variable based implementation)
   2. v2 response stats format should not store information that's not relevant 
       - for example intermediate stage stats should not kept any metadatakey related to leaf stages
       - suggest use a Map<MetadataKey, Object> 
   3. v2 response stats should be configurable to any level (global/stage/operator) 
       -they should reuse the same BrokerResponseStats (or just QueryStats) class and can be configured with Member variable field SubStats --> global will have stage subStats; and stage will have operator subStats
   
   
   Follow up on separate PR. this one looks good for now. 
   



##########
pinot-core/src/main/java/org/apache/pinot/core/query/reduce/ExecutionStatsAggregator.java:
##########
@@ -82,6 +93,32 @@ public synchronized void aggregate(@Nullable ServerRoutingInstance routingInstan
       _traceInfo.put(routingInstance.getShortName(), metadata.get(DataTable.MetadataKey.TRACE_INFO.getName()));
     }
 
+    String tableNamesStr = metadata.get(DataTable.MetadataKey.TABLE.getName());
+    String tableName = null;
+
+    if (tableNamesStr != null) {
+      List<String> tableNames = Arrays.stream(tableNamesStr.split("::")).collect(Collectors.toList());
+      _tableNames.addAll(tableNames);
+
+      //TODO: Decide a strategy to split stageLevel stats across tables for brokerMetrics
+      // assigning everything to the first table only for now
+      tableName = tableNames.get(0);
+    }

Review Comment:
   great catch and let's follow up in separate PR.



##########
pinot-common/src/main/java/org/apache/pinot/common/response/broker/BrokerResponseStats.java:
##########
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.response.broker;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.pinot.spi.utils.JsonUtils;
+
+
+@JsonPropertyOrder({"exceptions", "numBlocks", "numRows", "stageExecutionTimeMs", "numServersQueried",

Review Comment:
   TODO: follow up to not encode the metadata key as string, (to avoid ser/de overhead)



##########
pinot-common/src/main/java/org/apache/pinot/common/datatable/DataTable.java:
##########
@@ -102,16 +102,14 @@ enum MetadataValueType {
    */
   enum MetadataKey {
     UNKNOWN(0, "unknown", MetadataValueType.STRING),
-    TABLE(1, "table", MetadataValueType.STRING), // NOTE: this key is only used in PrioritySchedulerTest
+    TABLE(1, "table", MetadataValueType.STRING),

Review Comment:
   chatted offline. we are going to use MetadataKey going forward for compact metadata index perspective (so all the stats map on stage/operator/global level they all will be indexed/keyed by the integer)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] walterddr commented on a diff in pull request #10337: Add Statistics grouped at Stage ID level in the V2 Engine Response

Posted by "walterddr (via GitHub)" <gi...@apache.org>.
walterddr commented on code in PR #10337:
URL: https://github.com/apache/pinot/pull/10337#discussion_r1119548603


##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/service/QueryDispatcher.java:
##########
@@ -147,11 +149,22 @@ public static List<DataBlock> reduceMailboxReceive(MailboxReceiveOperator mailbo
       if (transferableBlock.isNoOpBlock()) {
         continue;
       } else if (transferableBlock.isEndOfStreamBlock()) {
-        if (executionStatsAggregator != null) {
+        if (executionStatsAggregatorMap != null) {
           for (Map.Entry<String, OperatorStats> entry : transferableBlock.getResultMetadata().entrySet()) {
             LOGGER.info("Broker Query Execution Stats - OperatorId: {}, OperatorStats: {}", entry.getKey(),
                 OperatorUtils.operatorStatsToJson(entry.getValue()));
-            executionStatsAggregator.aggregate(null, entry.getValue().getExecutionStats(), new HashMap<>());
+            OperatorStats operatorStats = entry.getValue();
+            ExecutionStatsAggregator rootStatsAggregator = executionStatsAggregatorMap.get(0);
+            ExecutionStatsAggregator stageStatsAggregator = executionStatsAggregatorMap.get(operatorStats.getStageId());
+            if (queryPlan != null) {
+              StageMetadata operatorStageMetadata = queryPlan.getStageMetadataMap().get(operatorStats.getStageId());
+              if (!operatorStageMetadata.getScannedTables().isEmpty()) {
+                operatorStats.recordSingleStat(DataTable.MetadataKey.TABLE.getName(),
+                    Joiner.on("::").join(operatorStageMetadata.getScannedTables()));
+              }
+            }

Review Comment:
   follow up: refactor this part out as utils 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org