You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2023/06/23 18:58:37 UTC

[pinot] branch master updated: Use _hasNull in the object single column distinct executors. (#10914)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e76cac586 Use _hasNull in the object single column distinct executors. (#10914)
7e76cac586 is described below

commit 7e76cac5866e5f40b734692fb082da683d2f4cab
Author: Shen Yu <sh...@startree.ai>
AuthorDate: Fri Jun 23 11:58:31 2023 -0700

    Use _hasNull in the object single column distinct executors. (#10914)
---
 ...eRawBigDecimalSingleColumnDistinctExecutor.java | 10 ++--
 .../BaseRawBytesSingleColumnDistinctExecutor.java  | 14 +++--
 .../BaseRawStringSingleColumnDistinctExecutor.java | 10 ++--
 ...DecimalSingleColumnDistinctOrderByExecutor.java |  9 +---
 .../RawBytesSingleColumnDistinctOnlyExecutor.java  |  4 +-
 ...awBytesSingleColumnDistinctOrderByExecutor.java | 12 ++---
 ...wStringSingleColumnDistinctOrderByExecutor.java |  9 +---
 .../pinot/queries/BigDecimalQueriesTest.java       |  5 +-
 .../queries/NullHandlingEnabledQueriesTest.java    | 60 ++++++++++++++++++++--
 9 files changed, 89 insertions(+), 44 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBigDecimalSingleColumnDistinctExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBigDecimalSingleColumnDistinctExecutor.java
index 88229cd389..b1ffe77602 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBigDecimalSingleColumnDistinctExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBigDecimalSingleColumnDistinctExecutor.java
@@ -45,6 +45,7 @@ public abstract class BaseRawBigDecimalSingleColumnDistinctExecutor implements D
   final boolean _nullHandlingEnabled;
 
   final ObjectSet<BigDecimal> _valueSet;
+  private boolean _hasNull;
 
   BaseRawBigDecimalSingleColumnDistinctExecutor(ExpressionContext expression, DataType dataType, int limit,
       boolean nullHandlingEnabled) {
@@ -64,6 +65,10 @@ public abstract class BaseRawBigDecimalSingleColumnDistinctExecutor implements D
     for (BigDecimal value : _valueSet) {
       records.add(new Record(new Object[]{value}));
     }
+    if (_hasNull) {
+      records.add(new Record(new Object[]{null}));
+    }
+    assert records.size() <= _limit + 1;
     return new DistinctTable(dataSchema, records, _nullHandlingEnabled);
   }
 
@@ -76,9 +81,8 @@ public abstract class BaseRawBigDecimalSingleColumnDistinctExecutor implements D
       RoaringBitmap nullBitmap = blockValueSet.getNullBitmap();
       for (int i = 0; i < numDocs; i++) {
         if (nullBitmap != null && nullBitmap.contains(i)) {
-          values[i] = null;
-        }
-        if (add(values[i])) {
+          _hasNull = true;
+        } else if (add(values[i])) {
           return true;
         }
       }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBytesSingleColumnDistinctExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBytesSingleColumnDistinctExecutor.java
index 69c893fec3..5babb84c41 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBytesSingleColumnDistinctExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawBytesSingleColumnDistinctExecutor.java
@@ -45,6 +45,7 @@ abstract class BaseRawBytesSingleColumnDistinctExecutor implements DistinctExecu
   final boolean _nullHandlingEnabled;
 
   final ObjectSet<ByteArray> _valueSet;
+  private boolean _hasNull;
 
   BaseRawBytesSingleColumnDistinctExecutor(ExpressionContext expression, DataType dataType, int limit,
       boolean nullHandlingEnabled) {
@@ -64,6 +65,10 @@ abstract class BaseRawBytesSingleColumnDistinctExecutor implements DistinctExecu
     for (ByteArray value : _valueSet) {
       records.add(new Record(new Object[]{value}));
     }
+    if (_hasNull) {
+      records.add(new Record(new Object[]{null}));
+    }
+    assert records.size() <= _limit + 1;
     return new DistinctTable(dataSchema, records, _nullHandlingEnabled);
   }
 
@@ -76,15 +81,14 @@ abstract class BaseRawBytesSingleColumnDistinctExecutor implements DistinctExecu
       RoaringBitmap nullBitmap = blockValueSet.getNullBitmap();
       for (int i = 0; i < numDocs; i++) {
         if (nullBitmap != null && nullBitmap.contains(i)) {
-          values[i] = null;
-        }
-        if (add(values[i])) {
+          _hasNull = true;
+        } else if (add(new ByteArray(values[i]))) {
           return true;
         }
       }
     } else {
       for (int i = 0; i < numDocs; i++) {
-        if (add(values[i])) {
+        if (add(new ByteArray(values[i]))) {
           return true;
         }
       }
@@ -92,5 +96,5 @@ abstract class BaseRawBytesSingleColumnDistinctExecutor implements DistinctExecu
     return false;
   }
 
-  protected abstract boolean add(byte[] value);
+  protected abstract boolean add(ByteArray byteArray);
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawStringSingleColumnDistinctExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawStringSingleColumnDistinctExecutor.java
index bb5cd0bb82..7b85a9a648 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawStringSingleColumnDistinctExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/BaseRawStringSingleColumnDistinctExecutor.java
@@ -44,6 +44,7 @@ abstract class BaseRawStringSingleColumnDistinctExecutor implements DistinctExec
   final boolean _nullHandlingEnabled;
 
   final ObjectSet<String> _valueSet;
+  private boolean _hasNull;
 
   BaseRawStringSingleColumnDistinctExecutor(ExpressionContext expression, DataType dataType, int limit,
       boolean nullHandlingEnabled) {
@@ -63,6 +64,10 @@ abstract class BaseRawStringSingleColumnDistinctExecutor implements DistinctExec
     for (String value : _valueSet) {
       records.add(new Record(new Object[]{value}));
     }
+    if (_hasNull) {
+      records.add(new Record(new Object[]{null}));
+    }
+    assert records.size() <= _limit + 1;
     return new DistinctTable(dataSchema, records, _nullHandlingEnabled);
   }
 
@@ -76,9 +81,8 @@ abstract class BaseRawStringSingleColumnDistinctExecutor implements DistinctExec
         RoaringBitmap nullBitmap = blockValueSet.getNullBitmap();
         for (int i = 0; i < numDocs; i++) {
           if (nullBitmap != null && nullBitmap.contains(i)) {
-            values[i] = null;
-          }
-          if (add(values[i])) {
+            _hasNull = true;
+          } else if (add(values[i])) {
             return true;
           }
         }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBigDecimalSingleColumnDistinctOrderByExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBigDecimalSingleColumnDistinctOrderByExecutor.java
index 99fa4d12a7..e0673f068a 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBigDecimalSingleColumnDistinctOrderByExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBigDecimalSingleColumnDistinctOrderByExecutor.java
@@ -39,13 +39,8 @@ public class RawBigDecimalSingleColumnDistinctOrderByExecutor extends BaseRawBig
 
     assert orderByExpression.getExpression().equals(expression);
     int comparisonFactor = orderByExpression.isAsc() ? -1 : 1;
-    if (nullHandlingEnabled) {
-      _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
-          (b1, b2) -> b1 == null ? (b2 == null ? 0 : 1) : (b2 == null ? -1 : b1.compareTo(b2)) * comparisonFactor);
-    } else {
-      _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
-          (b1, b2) -> b1.compareTo(b2) * comparisonFactor);
-    }
+    _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
+            (b1, b2) -> b1.compareTo(b2) * comparisonFactor);
   }
 
   @Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOnlyExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOnlyExecutor.java
index 7490f8f1df..fa66679882 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOnlyExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOnlyExecutor.java
@@ -35,8 +35,8 @@ public class RawBytesSingleColumnDistinctOnlyExecutor extends BaseRawBytesSingle
   }
 
   @Override
-  protected boolean add(byte[] value) {
-    _valueSet.add(new ByteArray(value));
+  protected boolean add(ByteArray byteArray) {
+    _valueSet.add(byteArray);
     return _valueSet.size() >= _limit;
   }
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOrderByExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOrderByExecutor.java
index e064420252..03e3b26b3f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOrderByExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawBytesSingleColumnDistinctOrderByExecutor.java
@@ -39,18 +39,12 @@ public class RawBytesSingleColumnDistinctOrderByExecutor extends BaseRawBytesSin
 
     assert orderByExpression.getExpression().equals(expression);
     int comparisonFactor = orderByExpression.isAsc() ? -1 : 1;
-    if (nullHandlingEnabled) {
-      _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
-          (b1, b2) -> b1 == null ? (b2 == null ? 0 : 1) : (b2 == null ? -1 : b1.compareTo(b2)) * comparisonFactor);
-    } else {
-      _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
-          (b1, b2) -> b1.compareTo(b2) * comparisonFactor);
-    }
+    _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
+            (b1, b2) -> b1.compareTo(b2) * comparisonFactor);
   }
 
   @Override
-  protected boolean add(byte[] value) {
-    ByteArray byteArray = new ByteArray(value);
+  protected boolean add(ByteArray byteArray) {
     if (!_valueSet.contains(byteArray)) {
       if (_valueSet.size() < _limit) {
         _valueSet.add(byteArray);
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawStringSingleColumnDistinctOrderByExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawStringSingleColumnDistinctOrderByExecutor.java
index 8471c60bb3..d86bad4e90 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawStringSingleColumnDistinctOrderByExecutor.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/distinct/raw/RawStringSingleColumnDistinctOrderByExecutor.java
@@ -38,13 +38,8 @@ public class RawStringSingleColumnDistinctOrderByExecutor extends BaseRawStringS
 
     assert orderByExpression.getExpression().equals(expression);
     int comparisonFactor = orderByExpression.isAsc() ? -1 : 1;
-    if (nullHandlingEnabled) {
-      _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
-          (s1, s2) -> s1 == null ? (s2 == null ? 0 : 1) : (s2 == null ? -1 : s1.compareTo(s2)) * comparisonFactor);
-    } else {
-      _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
-          (s1, s2) -> s1.compareTo(s2) * comparisonFactor);
-    }
+    _priorityQueue = new ObjectHeapPriorityQueue<>(Math.min(limit, MAX_INITIAL_CAPACITY),
+        (s1, s2) -> s1.compareTo(s2) * comparisonFactor);
   }
 
   @Override
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BigDecimalQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BigDecimalQueriesTest.java
index dc7340dd41..8be7131e5d 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/BigDecimalQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/BigDecimalQueriesTest.java
@@ -211,7 +211,7 @@ public class BigDecimalQueriesTest extends BaseQueriesTest {
       }
     }
     {
-      String query = String.format("SELECT DISTINCT %s FROM testTable ORDER BY %s", BIG_DECIMAL_COLUMN,
+      String query = String.format("SELECT DISTINCT %s FROM testTable ORDER BY %s LIMIT 4000", BIG_DECIMAL_COLUMN,
           BIG_DECIMAL_COLUMN);
       BrokerResponseNative brokerResponse = getBrokerResponse(query, queryOptions);
       ResultTable resultTable = brokerResponse.getResultTable();
@@ -219,7 +219,6 @@ public class BigDecimalQueriesTest extends BaseQueriesTest {
       assertEquals(dataSchema,
           new DataSchema(new String[]{BIG_DECIMAL_COLUMN}, new ColumnDataType[]{ColumnDataType.BIG_DECIMAL}));
       List<Object[]> rows = resultTable.getRows();
-      assertEquals(rows.size(), 10);
       int i = 0;
       for (int index = 0; index < rows.size() - 1; index++) {
         Object[] row = rows.get(index);
@@ -260,8 +259,6 @@ public class BigDecimalQueriesTest extends BaseQueriesTest {
         i++;
         index++;
       }
-      // The default null ordering is 'NULLS LAST'. Therefore, null will appear as the last record.
-      assertNull(rows.get(rows.size() - 1)[0]);
     }
     {
       // This test case was added to validate path-code for distinct w/o order by. See:
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/NullHandlingEnabledQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/NullHandlingEnabledQueriesTest.java
index dc2515c222..56484ca97e 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/NullHandlingEnabledQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/NullHandlingEnabledQueriesTest.java
@@ -225,14 +225,14 @@ public class NullHandlingEnabledQueriesTest extends BaseQueriesTest {
     assertEquals(resultTable.getRows().get(3), new Object[]{null, null});
   }
 
-  @DataProvider(name = "DataTypes")
-  public static Object[][] getDataTypes() {
+  @DataProvider(name = "NumberTypes")
+  public static Object[][] getPrimitiveDataTypes() {
     return new Object[][]{
         {FieldSpec.DataType.INT}, {FieldSpec.DataType.LONG}, {FieldSpec.DataType.DOUBLE}, {FieldSpec.DataType.FLOAT}
     };
   }
 
-  @Test(dataProvider = "DataTypes")
+  @Test(dataProvider = "NumberTypes")
   public void testSelectDistinctWithLimit(FieldSpec.DataType dataType)
       throws Exception {
     _rows = new ArrayList<>();
@@ -253,7 +253,7 @@ public class NullHandlingEnabledQueriesTest extends BaseQueriesTest {
     assertEquals(resultTable.getRows().size(), 3);
   }
 
-  @Test(dataProvider = "DataTypes")
+  @Test(dataProvider = "NumberTypes")
   public void testSelectDistinctOrderByWithLimit(FieldSpec.DataType dataType)
       throws Exception {
     double delta = 0.01;
@@ -278,6 +278,58 @@ public class NullHandlingEnabledQueriesTest extends BaseQueriesTest {
     assertTrue(Math.abs(((Number) resultTable.getRows().get(2)[0]).doubleValue() - 3.0) < delta);
   }
 
+
+  @DataProvider(name = "ObjectTypes")
+  public static Object[][] getObjectDataTypes() {
+    return new Object[][]{
+        {FieldSpec.DataType.STRING, "a"}, {
+        FieldSpec.DataType.BIG_DECIMAL, 1
+    }, {
+        FieldSpec.DataType.BYTES, "a string".getBytes()
+    }
+    };
+  }
+
+  @Test(dataProvider = "ObjectTypes")
+  public void testObjectSingleColumnDistinctOrderByNullsFirst(FieldSpec.DataType dataType, Object value)
+      throws Exception {
+    _rows = new ArrayList<>();
+    insertRow(null);
+    insertRow(value);
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build();
+    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension(COLUMN1, dataType).build();
+    setUpSegments(tableConfig, schema, _rows);
+    Map<String, String> queryOptions = new HashMap<>();
+    queryOptions.put("enableNullHandling", "true");
+    String query = String.format("SELECT DISTINCT %s FROM testTable ORDER BY %s NULLS FIRST LIMIT 1", COLUMN1, COLUMN1);
+
+    BrokerResponseNative brokerResponse = getBrokerResponse(query, queryOptions);
+
+    ResultTable resultTable = brokerResponse.getResultTable();
+    assertEquals(resultTable.getRows().size(), 1);
+    assertNull(resultTable.getRows().get(0)[0]);
+  }
+
+  @Test(dataProvider = "ObjectTypes")
+  public void testObjectSingleColumnDistinctOrderByNullsLast(FieldSpec.DataType dataType, Object value)
+      throws Exception {
+    _rows = new ArrayList<>();
+    insertRow(null);
+    insertRow(value);
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build();
+    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension(COLUMN1, dataType).build();
+    setUpSegments(tableConfig, schema, _rows);
+    Map<String, String> queryOptions = new HashMap<>();
+    queryOptions.put("enableNullHandling", "true");
+    String query = String.format("SELECT DISTINCT %s FROM testTable ORDER BY %s NULLS LAST LIMIT 1", COLUMN1, COLUMN1);
+
+    BrokerResponseNative brokerResponse = getBrokerResponse(query, queryOptions);
+
+    ResultTable resultTable = brokerResponse.getResultTable();
+    assertEquals(resultTable.getRows().size(), 1);
+    assertNotNull(resultTable.getRows().get(0)[0]);
+  }
+
   @Test
   public void testTransformBlockValSetGetNullBitmap()
       throws Exception {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org