You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2020/11/04 20:25:07 UTC
[GitHub] [incubator-pinot] jackjlli commented on a change in pull request #6224: Improve comparison coverage for selection SQL queries in ClusterInteg…

jackjlli commented on a change in pull request #6224:
URL: https://github.com/apache/incubator-pinot/pull/6224#discussion_r517605955



##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
##########
@@ -666,149 +668,14 @@ public static void testQuery(String pinotQuery, String queryFormat, String broke
       }
       Set<String> expectedValues = new HashSet<>();
       List<String> expectedOrderByValues = new ArrayList<>();
-      Map<String, String> reusableExpectedValueMap = new HashMap<>();
-      Map<String, List<String>> reusableMultiValuesMap = new HashMap<>();
-      List<String> reusableColumnOrder = new ArrayList<>();
-      int h2NumRows;
-      for (h2NumRows = 0; h2ResultSet.next() && h2NumRows < MAX_NUM_ROWS_TO_COMPARE; h2NumRows++) {
-        reusableExpectedValueMap.clear();
-        reusableMultiValuesMap.clear();
-        reusableColumnOrder.clear();
-
-        int numColumns = h2MetaData.getColumnCount();
-        for (int columnIndex = 1; columnIndex <= numColumns; columnIndex++) {
-          String columnName = h2MetaData.getColumnName(columnIndex);
-
-          // Handle null result and convert boolean value to lower case
-          String columnValue = h2ResultSet.getString(columnIndex);
-          if (columnValue == null) {
-            columnValue = "null";
-          } else {
-            columnValue = convertBooleanToLowerCase(columnValue);
-          }
 
-          // Handle multi-value columns
-          int length = columnName.length();
-          if (length > 5 && columnName.substring(length - 5, length - 1).equals("__MV")) {
-            // Multi-value column
-            String multiValueColumnName = columnName.substring(0, length - 5);
-            List<String> multiValue = reusableMultiValuesMap.get(multiValueColumnName);
-            if (multiValue == null) {
-              multiValue = new ArrayList<>();
-              reusableMultiValuesMap.put(multiValueColumnName, multiValue);
-              reusableColumnOrder.add(multiValueColumnName);
-            }
-            multiValue.add(columnValue);
-          } else {
-            // Single-value column
-            reusableExpectedValueMap.put(columnName, columnValue);
-            reusableColumnOrder.add(columnName);
-          }
-        }
-
-        // Add multi-value column results to the expected values
-        // The reason for this step is that Pinot does not maintain order of elements in multi-value columns
-        for (Map.Entry<String, List<String>> entry : reusableMultiValuesMap.entrySet()) {
-          List<String> multiValue = entry.getValue();
-          Collections.sort(multiValue);
-          reusableExpectedValueMap.put(entry.getKey(), multiValue.toString());
-        }
-
-        // Build expected value String
-        StringBuilder expectedValue = new StringBuilder();
-        StringBuilder expectedOrderByValue = new StringBuilder();
-        for (String column : reusableColumnOrder) {
-          expectedValue.append(column).append(':').append(reusableExpectedValueMap.get(column)).append(' ');
-          if (orderByColumns.contains(column)) {
-            expectedOrderByValue.append(column).append(':').append(reusableExpectedValueMap.get(column)).append(' ');
-          }
-        }
-        expectedValues.add(expectedValue.toString());
-        expectedOrderByValues.add(expectedOrderByValue.toString());
-      }
+      int h2NumRows = getH2ExpectedValues(expectedValues, expectedOrderByValues, h2ResultSet, h2MetaData, orderByColumns);

Review comment:
       If the result comparisons for PQL and SQL are done in two separate methods, should we still need to pass the queryFormat into `testQuery()` method?

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
##########
@@ -975,6 +830,199 @@ private static boolean isSelectionQuery(BrokerRequest brokerRequest) {
     return false;
   }
 
+  private static Set<String> convertToUpperCase(Set<String> columns) {
+    Set<String> upperCaseColumns = new HashSet<>();
+    for (String column: columns) {
+      upperCaseColumns.add(column.toUpperCase());
+    }
+    return upperCaseColumns;
+  }
+
+  private static int getH2ExpectedValues(Set<String> expectedValues, List<String> expectedOrderByValues,
+      ResultSet h2ResultSet, ResultSetMetaData h2MetaData, Set<String> orderByColumns) throws SQLException {
+    Map<String, String> reusableExpectedValueMap = new HashMap<>();
+    Map<String, List<String>> reusableMultiValuesMap = new HashMap<>();
+    List<String> reusableColumnOrder = new ArrayList<>();
+    int h2NumRows;
+    int numColumns = h2MetaData.getColumnCount();
+
+    for (h2NumRows = 0; h2ResultSet.next() && h2NumRows < MAX_NUM_ROWS_TO_COMPARE; h2NumRows++) {
+      reusableExpectedValueMap.clear();
+      reusableMultiValuesMap.clear();
+      reusableColumnOrder.clear();
+
+      for (int columnIndex = 1; columnIndex <= numColumns; columnIndex++) {

Review comment:
       Can you add a comment why the index starts at 1 instead of 0?

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
##########
@@ -975,6 +830,199 @@ private static boolean isSelectionQuery(BrokerRequest brokerRequest) {
     return false;
   }
 
+  private static Set<String> convertToUpperCase(Set<String> columns) {
+    Set<String> upperCaseColumns = new HashSet<>();
+    for (String column: columns) {
+      upperCaseColumns.add(column.toUpperCase());
+    }
+    return upperCaseColumns;
+  }
+
+  private static int getH2ExpectedValues(Set<String> expectedValues, List<String> expectedOrderByValues,
+      ResultSet h2ResultSet, ResultSetMetaData h2MetaData, Set<String> orderByColumns) throws SQLException {
+    Map<String, String> reusableExpectedValueMap = new HashMap<>();
+    Map<String, List<String>> reusableMultiValuesMap = new HashMap<>();
+    List<String> reusableColumnOrder = new ArrayList<>();
+    int h2NumRows;
+    int numColumns = h2MetaData.getColumnCount();
+
+    for (h2NumRows = 0; h2ResultSet.next() && h2NumRows < MAX_NUM_ROWS_TO_COMPARE; h2NumRows++) {
+      reusableExpectedValueMap.clear();
+      reusableMultiValuesMap.clear();
+      reusableColumnOrder.clear();
+
+      for (int columnIndex = 1; columnIndex <= numColumns; columnIndex++) {
+        String columnName = h2MetaData.getColumnName(columnIndex);
+
+        // Handle null result and convert boolean value to lower case
+        String columnValue = h2ResultSet.getString(columnIndex);
+        if (columnValue == null) {
+          columnValue = "null";
+        } else {
+          columnValue = convertBooleanToLowerCase(columnValue);
+        }
+
+        // Handle multi-value columns
+        int length = columnName.length();
+        if (length > 5 && columnName.substring(length - 5, length - 1).equals("__MV")) {
+          // Multi-value column
+          String multiValueColumnName = columnName.substring(0, length - 5);
+          List<String> multiValue = reusableMultiValuesMap.get(multiValueColumnName);
+          if (multiValue == null) {
+            multiValue = new ArrayList<>();
+            reusableMultiValuesMap.put(multiValueColumnName, multiValue);
+            reusableColumnOrder.add(multiValueColumnName);
+          }
+          multiValue.add(columnValue);
+        } else {
+          // Single-value column
+          String columnDataType = h2MetaData.getColumnTypeName(columnIndex);
+          columnValue = removeTrailingZeroForNumber(columnValue, columnDataType);
+          reusableExpectedValueMap.put(columnName, columnValue);
+          reusableColumnOrder.add(columnName);
+        }
+      }
+
+      // Add multi-value column results to the expected values
+      // The reason for this step is that Pinot does not maintain order of elements in multi-value columns
+      for (Map.Entry<String, List<String>> entry : reusableMultiValuesMap.entrySet()) {
+        List<String> multiValue = entry.getValue();
+        Collections.sort(multiValue);
+        reusableExpectedValueMap.put(entry.getKey(), multiValue.toString());
+      }
+
+      // Build expected value String
+      StringBuilder expectedValue = new StringBuilder();
+      StringBuilder expectedOrderByValue = new StringBuilder();
+      for (String column : reusableColumnOrder) {
+        expectedValue.append(reusableExpectedValueMap.get(column)).append(' ');
+        if (orderByColumns.contains(column)) {
+          expectedOrderByValue.append(reusableExpectedValueMap.get(column)).append(' ');
+        }
+      }
+      expectedValues.add(expectedValue.toString());
+      expectedOrderByValues.add(expectedOrderByValue.toString());
+    }
+
+    return h2NumRows;
+  }
+
+  private static void comparePinotResultsWithExpectedValues(Set<String> expectedValues, List<String> expectedOrderByValues,
+      org.apache.pinot.client.ResultSet connectionResultSet, Set<String> orderByColumns, String pinotQuery, List<String> sqlQueries,
+      int h2NumRows, long pinotNumRecordsSelected) throws IOException, SQLException {
+
+    int pinotNumRows = connectionResultSet.getRowCount();
+    // No record selected in H2
+    if (h2NumRows== 0) {
+      if (pinotNumRows != 0) {
+        String failureMessage = "No record selected in H2 but number of records selected in Pinot: " + pinotNumRows;
+        failure(pinotQuery, sqlQueries, failureMessage);
+        return;
+      }
+
+      if (pinotNumRecordsSelected != 0) {
+        String failureMessage =
+            "No selection result returned in Pinot but number of records selected: " + pinotNumRecordsSelected;
+        failure(pinotQuery, sqlQueries, failureMessage);
+        return;
+      }
+
+      // Skip further comparison
+      return;
+    }
+
+    PinotQuery compiledQuery = CalciteSqlParser.compileToPinotQuery(pinotQuery);
+    boolean isLimitSet = compiledQuery.isSetLimit();
+    int limit = compiledQuery.getLimit();
+
+    // Only compare exhausted results
+    if (h2NumRows < MAX_NUM_ROWS_TO_COMPARE) {
+
+      for (int rowIndex = 0; rowIndex < pinotNumRows; rowIndex++) {
+        // Build actual value String.
+        StringBuilder actualValueBuilder = new StringBuilder();
+        StringBuilder actualOrderByValueBuilder = new StringBuilder();
+        for (int columnIndex = 0; columnIndex < connectionResultSet.getColumnCount(); columnIndex++) {
+          // Convert column name to all uppercase to make it compatible with H2
+          String columnName = connectionResultSet.getColumnName(columnIndex).toUpperCase();
+          String columnResult = connectionResultSet.getString(rowIndex, columnIndex);
+
+          String columnDataType = connectionResultSet.getColumnDataType(columnIndex);
+          columnResult = removeTrailingZeroForNumber(columnResult, columnDataType);
+          // TODO: Find a better way to identify multi-value column
+          if (columnResult.charAt(0) == '[') {
+            // Multi-value column
+            JsonNode columnValues = JsonUtils.stringToJsonNode(columnResult);
+            List<String> multiValue = new ArrayList<>();
+            int length = columnValues.size();
+            for (int elementIndex = 0; elementIndex < length; elementIndex++) {
+              multiValue.add(columnValues.get(elementIndex).asText());
+            }
+            for (int elementIndex = length; elementIndex < MAX_NUM_ELEMENTS_IN_MULTI_VALUE_TO_COMPARE; elementIndex++) {
+              multiValue.add("null");
+            }
+            Collections.sort(multiValue);
+            actualValueBuilder.append(multiValue.toString()).append(' ');
+            if (orderByColumns.contains(columnName)) {
+              actualOrderByValueBuilder.append(columnResult).append(' ');
+            }
+          } else {
+            // Single-value column
+            actualValueBuilder.append(columnResult).append(' ');
+            if (orderByColumns.contains(columnName)) {
+              actualOrderByValueBuilder.append(columnResult).append(' ');
+            }
+          }
+        }
+
+        String actualValue = actualValueBuilder.toString();
+        String actualOrderByValue = actualOrderByValueBuilder.toString();
+        // Check actual value in expected values set, skip comparison if query response is truncated by limit
+        if ((!isLimitSet || limit > h2NumRows) && !expectedValues.contains(actualValue)) {
+          String failureMessage = "Selection result returned in Pinot but not in H2: " + actualValue + ", " + expectedValues;
+          failure(pinotQuery, sqlQueries, failureMessage);
+          return;
+        }
+        if (!orderByColumns.isEmpty()) {
+          // Check actual group value is the same as expected group value in the same order.
+          if (!expectedOrderByValues.get(rowIndex).equals(actualOrderByValue)) {
+            String failureMessage = String.format(
+                "Selection Order by result at row index: %d in Pinot: [ %s ] is different than result in H2: [ %s ].",
+                rowIndex, actualOrderByValue, expectedOrderByValues.get(rowIndex));
+            failure(pinotQuery, sqlQueries, failureMessage);
+            return;
+          }
+        }
+      }
+    }
+  }
+
+  private static String removeTrailingZeroForNumber(String value, String type) {
+    // remove trailing zero after decimal point to compare decimal numbers
+    if (type == null || type.toUpperCase().equals("FLOAT") || type.toUpperCase().equals("DOUBLE") || type.toUpperCase().equals("BIGINT")) {
+      // remove trailing zero after decimal point to be consistent with h2 data
+      if (value.endsWith(".0")) {
+        return value.substring(0, value.length() - 2);
+      }
+    }
+    return value;
+  }
+
+  private static List<String> appendColumnsToSelectionRequests(Set<String> columns, List<String> requests) {

Review comment:
       Can you add some comments on why we do it this way in front of this method?

##########
File path: pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
##########
@@ -975,6 +830,199 @@ private static boolean isSelectionQuery(BrokerRequest brokerRequest) {
     return false;
   }
 
+  private static Set<String> convertToUpperCase(Set<String> columns) {
+    Set<String> upperCaseColumns = new HashSet<>();

Review comment:
       Is it possible to reuse the original set here?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org