You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by mc...@apache.org on 2022/03/30 17:04:40 UTC

[pinot] branch master updated: Adjust comparison of numDocsScanned (#8433)

This is an automated email from the ASF dual-hosted git repository.

mcvsubbu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new deaccf7  Adjust comparison of numDocsScanned (#8433)
deaccf7 is described below

commit deaccf776845d7dc670f686b6d12bec5bf27f266
Author: Subbu Subramaniam <mc...@users.noreply.github.com>
AuthorDate: Wed Mar 30 10:03:46 2022 -0700

    Adjust comparison of numDocsScanned (#8433)
    
    * Adjust comparison of numDocsScanned
    
    Number of documents scanned can be lower than expected when comparing
    results of compatibility tests. We don't like it to be higher since that
    may indicate a performance regression.
    
    Changed the code to not expect numDocsScanned to be equal all the time
    
    * Fix style errors
    
    * Fixed log statements
---
 .../pinot/common/utils/SqlResultComparator.java    | 66 ++++++++++++----------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/SqlResultComparator.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/SqlResultComparator.java
index d6357a7..c9f2481 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/SqlResultComparator.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/SqlResultComparator.java
@@ -126,29 +126,35 @@ public class SqlResultComparator {
      */
     if (expected.has(FIELD_IS_SUPERSET) && expected.get(FIELD_IS_SUPERSET).asBoolean(false)) {
       return areElementsSubset(actualElementsSerialized, expectedElementsSerialized);
-    } else {
-      if (!areLengthsEqual(actual, expected)) {
-        return false;
-      }
-      /*
-       * Pinot server do some early termination optimization (process in parallel and early return if get enough
-       * documents to fulfill the LIMIT and OFFSET requirement) for queries:
-       * - selection without order by
-       * - selection with order by (by sorting the segments on min-max value)
-       * - DISTINCT queries.
-       * numDocsScanned is non-deterministic for those queries so numDocsScanned comparison should be skipped.
-       *
-       * NOTE: DISTINCT queries are modeled as non-selection queries during query processing, but all DISTINCT
-       * queries are selection queries during Calcite parsing (DISTINCT queries are selection queries with
-       * selectNode.getModifierNode(SqlSelectKeyword.DISTINCT) != null).
-       */
-      if (!isSelectionQuery(query) && !areNumDocsScannedEqual(actual, expected)) {
+    }
+    if (!areLengthsEqual(actual, expected)) {
+      return false;
+    }
+    boolean areResultsEqual = isOrderByQuery(query) ? areOrderByQueryElementsEqual(actualRows, expectedRows,
+        actualElementsSerialized, expectedElementsSerialized, query)
+        : areNonOrderByQueryElementsEqual(actualElementsSerialized, expectedElementsSerialized);
+    /*
+     * Pinot servers implement early termination optimization (process in parallel and early return if we get enough
+     * documents to fulfill the LIMIT and OFFSET requirement) for queries for the following cases:
+     * - selection without order by
+     * - selection with order by (by sorting the segments on min-max value)
+     * - DISTINCT queries.
+     * numDocsScanned is non-deterministic for those queries so numDocsScanned comparison should be skipped.
+     *
+     * In other cases, we accept if numDocsScanned is better than the expected one, since that is indicative of
+     * performance improvement.
+     *
+     * NOTE: DISTINCT queries are modeled as non-selection queries during query processing, but all DISTINCT
+     * queries are selection queries during Calcite parsing (DISTINCT queries are selection queries with
+     * selectNode.getModifierNode(SqlSelectKeyword.DISTINCT) != null).
+     */
+    if (areResultsEqual) {
+      // Results are good, check for any metadata differences here.
+      if (!isSelectionQuery(query) && !isNumDocsScannedBetter(actual, expected)) {
         return false;
       }
-      return isOrderByQuery(query) ? areOrderByQueryElementsEqual(actualRows, expectedRows, actualElementsSerialized,
-          expectedElementsSerialized, query)
-          : areNonOrderByQueryElementsEqual(actualElementsSerialized, expectedElementsSerialized);
     }
+    return areResultsEqual;
   }
 
   private static boolean areOrderByQueryElementsEqual(ArrayNode actualElements, ArrayNode expectedElements,
@@ -275,7 +281,7 @@ public class SqlResultComparator {
     return true;
   }
 
-  public static boolean hasExceptions(JsonNode actual) {
+  private static boolean hasExceptions(JsonNode actual) {
     if (!actual.get(FIELD_EXCEPTIONS).isEmpty()) {
       LOGGER.error("Got exception: {} when querying!", actual.get(FIELD_EXCEPTIONS));
       return true;
@@ -369,34 +375,34 @@ public class SqlResultComparator {
     return true;
   }
 
-  private static boolean areNumEntriesScannedInFilterEqual(JsonNode actual, JsonNode expected) {
+  private static boolean isNumEntriesScannedInFilterBetter(JsonNode actual, JsonNode expected) {
     long actualNumEntriesScannedInFilter = actual.get(FIELD_NUM_ENTRIES_SCANNED_IN_FILTER).asLong();
     long expectedNumEntriesScannedInFilter = expected.get(FIELD_NUM_ENTRIES_SCANNED_IN_FILTER).asLong();
-    if (actualNumEntriesScannedInFilter != expectedNumEntriesScannedInFilter) {
+    if (actualNumEntriesScannedInFilter > expectedNumEntriesScannedInFilter) {
       LOGGER
-          .error("The numEntriesScannedInFilter don't match! Actual: {}, Expected: {}", actualNumEntriesScannedInFilter,
+          .error("The numEntriesScannedInFilter is worse. Actual: {}, Expected: {}", actualNumEntriesScannedInFilter,
               expectedNumEntriesScannedInFilter);
       return false;
     }
     return true;
   }
 
-  private static boolean areNumEntriesScannedPostFilterEqual(JsonNode actual, JsonNode expected) {
+  private static boolean isNumEntriesScannedPostFilterBetter(JsonNode actual, JsonNode expected) {
     long actualNumEntriesScannedPostFilter = actual.get(FIELD_NUM_ENTRIES_SCANNED_POST_FILTER).asLong();
     long expectedNumEntriesScannedPostFilter = expected.get(FIELD_NUM_ENTRIES_SCANNED_POST_FILTER).asLong();
-    if (actualNumEntriesScannedPostFilter != expectedNumEntriesScannedPostFilter) {
-      LOGGER.error("The numEntriesScannedPostFilter don't match! Actual: {}, Expected: {}",
+    if (actualNumEntriesScannedPostFilter > expectedNumEntriesScannedPostFilter) {
+      LOGGER.error("The numEntriesScannedPostFilter is worse. Actual: {}, Expected: {}",
           actualNumEntriesScannedPostFilter, expectedNumEntriesScannedPostFilter);
       return false;
     }
     return true;
   }
 
-  private static boolean areNumDocsScannedEqual(JsonNode actual, JsonNode expected) {
+  private static boolean isNumDocsScannedBetter(JsonNode actual, JsonNode expected) {
     int actualNumDocsScanned = actual.get(FIELD_NUM_DOCS_SCANNED).asInt();
     int expectedNumDocsScanned = expected.get(FIELD_NUM_DOCS_SCANNED).asInt();
-    if (actualNumDocsScanned != expectedNumDocsScanned) {
-      LOGGER.error("The numDocsScanned don't match! Actual: {}, Expected: {}", actualNumDocsScanned,
+    if (actualNumDocsScanned > expectedNumDocsScanned) {
+      LOGGER.error("The numDocsScanned is worse. Actual: {}, Expected: {}", actualNumDocsScanned,
           expectedNumDocsScanned);
       return false;
     }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org