You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/02/15 00:51:19 UTC

[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #8195: No dictionary group by perf

Jackie-Jiang commented on a change in pull request #8195:
URL: https://github.com/apache/pinot/pull/8195#discussion_r806355760



##########
File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
##########
@@ -306,7 +325,7 @@ private int getGroupIdForKey(FixedIntArray keyList) {
     if (groupId == INVALID_ID) {
       if (_numGroups < _globalGroupIdUpperBound) {
         groupId = _numGroups;
-        _groupKeyMap.put(keyList, _numGroups++);
+        _groupKeyMap.put(keyList.clone(), _numGroups++);

Review comment:
       This should be reverted?

##########
File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
##########
@@ -92,60 +92,62 @@ public int getGlobalGroupKeyUpperBound() {
   @Override
   public void generateKeysForBlock(TransformBlock transformBlock, int[] groupKeys) {
     int numDocs = transformBlock.getNumDocs();
-    int[][] keys = new int[numDocs][_numGroupByExpressions];
+    Object[] values = new Object[_numGroupByExpressions];
     for (int i = 0; i < _numGroupByExpressions; i++) {
       BlockValSet blockValSet = transformBlock.getBlockValueSet(_groupByExpressions[i]);
       if (_dictionaries[i] != null) {
-        int[] dictIds = blockValSet.getDictionaryIdsSV();
-        for (int j = 0; j < numDocs; j++) {
-          keys[j][i] = dictIds[j];
-        }
+        values[i] = blockValSet.getDictionaryIdsSV();
       } else {
-        ValueToIdMap onTheFlyDictionary = _onTheFlyDictionaries[i];
         switch (_storedTypes[i]) {
           case INT:
-            int[] intValues = blockValSet.getIntValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(intValues[j]);
-            }
+            values[i] = blockValSet.getIntValuesSV();
             break;
           case LONG:
-            long[] longValues = blockValSet.getLongValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(longValues[j]);
-            }
+            values[i] = blockValSet.getLongValuesSV();
             break;
           case FLOAT:
-            float[] floatValues = blockValSet.getFloatValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(floatValues[j]);
-            }
+            values[i] = blockValSet.getFloatValuesSV();
             break;
           case DOUBLE:
-            double[] doubleValues = blockValSet.getDoubleValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(doubleValues[j]);
-            }
+            values[i] = blockValSet.getDoubleValuesSV();
             break;
           case STRING:
-            String[] stringValues = blockValSet.getStringValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(stringValues[j]);
-            }
+            values[i] = blockValSet.getStringValuesSV();
             break;
           case BYTES:
-            byte[][] bytesValues = blockValSet.getBytesValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(new ByteArray(bytesValues[j]));
-            }
+            values[i] = blockValSet.getBytesValuesSV();
             break;
           default:
             throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + _storedTypes[i]);
         }
       }
     }
-    for (int i = 0; i < numDocs; i++) {
-      groupKeys[i] = getGroupIdForKey(new FixedIntArray(keys[i]));
+    int[] keyValues = new int[_numGroupByExpressions];
+    // note that we are mutating its backing array for memory efficiency
+    FixedIntArray flyweightKey = new FixedIntArray(keyValues);
+    for (int row = 0; row < numDocs; row++) {

Review comment:
       Is this faster than the original way of processing values? For the new approach we need to do a lot of if checks on a per value basis, which can potentially hurt the performance. If we want to avoid allocating the `keys` multiple times, we can actually reuse the `keys`.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org