You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2022/02/26 00:00:42 UTC

[asterixdb] 06/10: [ASTERIXDB-3016][RT] Fix failure in hash groupby

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 3d79c9f39392d6e2e5127b716788e4335014606b
Author: Dmitry Lychagin <dm...@couchbase.com>
AuthorDate: Wed Feb 16 16:47:08 2022 -0800

    [ASTERIXDB-3016][RT] Fix failure in hash groupby
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Modify hash group by to force garbage collection on the
      hash table if a tuple could not be inserted into it
    - Make hash group by clean up its run files in case
      of an error
    
    Change-Id: I7a133fa1d0555ebbcb7a9e3cb7445757716c9a2a
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15325
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Dmitry Lychagin <dm...@couchbase.com>
    Reviewed-by: Till Westmann <ti...@couchbase.com>
---
 .../query-ASTERIXDB-3016.1.ddl.sqlpp               | 28 ++++++++++++++++++++++
 .../query-ASTERIXDB-3016.2.update.sqlpp            | 26 ++++++++++++++++++++
 .../query-ASTERIXDB-3016.3.query.sqlpp             | 27 +++++++++++++++++++++
 .../query-ASTERIXDB-3016.3.adm                     |  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 ++++
 .../std/group/HashSpillableTableFactory.java       | 28 +++++++++++++++-------
 .../ExternalGroupWriteOperatorNodePushable.java    | 14 +++++++----
 .../std/structures/ISerializableTable.java         |  2 +-
 8 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp
new file mode 100644
index 0000000..fdfee88
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse tpcds if exists;
+create dataverse tpcds;
+
+use tpcds;
+
+create dataset item(i_item_sk string not unknown) open type primary key i_item_sk;
+
+create dataset inventory(inv_date_sk string not unknown, inv_item_sk string not unknown,
+  inv_warehouse_sk string not unknown) open type primary key inv_date_sk, inv_item_sk, inv_warehouse_sk;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp
new file mode 100644
index 0000000..f8fe178
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use tpcds;
+
+set `import-private-functions` `true`;
+
+insert into item (select value object_remove(t, "table_name") from tpcds_datagen("item", 0.5) t);
+
+insert into inventory (select value object_remove(t, "table_name") from tpcds_datagen("inventory", 0.5) t);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp
new file mode 100644
index 0000000..01158b9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use tpcds;
+
+SELECT ROUND(AVG(inv.inv_quantity_on_hand), 1) qoh, i.i_product_name
+FROM  inventory inv, item i
+WHERE inv.inv_item_sk /*+hash-bcast*/ = i.i_item_sk
+/*+ hash */ GROUP BY i.i_product_name
+ORDER BY qoh, i.i_product_name
+LIMIT 1;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm
new file mode 100644
index 0000000..96494f4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm
@@ -0,0 +1 @@
+{ "qoh": 402.0, "i_product_name": "ableoughtn st" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 8b6b7c0..81567ff 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -6099,6 +6099,11 @@
         <output-dir compare="Text">hash-group-by-decor</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="group-by">
+      <compilation-unit name="query-ASTERIXDB-3016">
+        <output-dir compare="Text">query-ASTERIXDB-3016</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="index-join">
     <test-case FilePath="index-join">
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java
index 4f0c304..1e5c121 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java
@@ -177,16 +177,21 @@ public class HashSpillableTableFactory implements ISpillableTableFactory {
                 }
 
                 // Checks whether the garbage collection is required and conducts a garbage collection if so.
-                if (hashTableForTuplePointer.isGarbageCollectionNeeded()) {
+                collectGarbageInHashTableForTuplePointer(false);
+                bufferManager.clearPartition(partition);
+            }
+
+            private boolean collectGarbageInHashTableForTuplePointer(boolean force) throws HyracksDataException {
+                if (force || hashTableForTuplePointer.isGarbageCollectionNeeded()) {
                     int numberOfFramesReclaimed =
                             hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate);
                     if (LOGGER.isDebugEnabled()) {
                         LOGGER.debug("Garbage Collection on Hash table is done. Deallocated frames:"
                                 + numberOfFramesReclaimed);
                     }
+                    return numberOfFramesReclaimed != -1;
                 }
-
-                bufferManager.clearPartition(partition);
+                return false;
             }
 
             private int getPartition(int entryInHashTable) {
@@ -234,11 +239,18 @@ public class HashSpillableTableFactory implements ISpillableTableFactory {
                 }
 
                 // Insertion to the hash table
-                if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) {
-                    // To preserve the atomicity of this method, we need to undo the effect
-                    // of the above bufferManager.insertTuple() call since the given insertion has failed.
-                    bufferManager.cancelInsertTuple(pid);
-                    return false;
+                boolean inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer);
+                if (!inserted) {
+                    // Force garbage collection on the hash table and attempt to insert again
+                    if (collectGarbageInHashTableForTuplePointer(true)) {
+                        inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer);
+                    }
+                    if (!inserted) {
+                        // To preserve the atomicity of this method, we need to undo the effect
+                        // of the above bufferManager.insertTuple() call since the given insertion has failed.
+                        bufferManager.cancelInsertTuple(pid);
+                        return false;
+                    }
                 }
 
                 return true;
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java
index 1a6f4ef..8618528 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java
@@ -142,11 +142,15 @@ public class ExternalGroupWriteOperatorNodePushable extends AbstractUnaryOutputS
                         runs[i].getFileSize(), gbyFields, fdFields, groupByComparators, nmkComputer,
                         mergeAggregatorFactory, partialAggRecordDesc, outRecordDesc, frameLimit, level);
                 RunFileWriter[] runFileWriters = new RunFileWriter[partitionTable.getNumPartitions()];
-                int[] sizeInTuplesNextLevel =
-                        buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
-                for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
-                    if (runFileWriters[idFile] != null) {
-                        generatedRuns.add(runFileWriters[idFile]);
+                int[] sizeInTuplesNextLevel;
+                try {
+                    sizeInTuplesNextLevel =
+                            buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters);
+                } finally {
+                    for (int idFile = 0; idFile < runFileWriters.length; idFile++) {
+                        if (runFileWriters[idFile] != null) {
+                            generatedRuns.add(runFileWriters[idFile]);
+                        }
                     }
                 }
 
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java
index 51f9984..58ad213 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java
@@ -54,7 +54,7 @@ public interface ISerializableTable {
      *            required to access the real tuple to calculate the original hash value
      * @param tpc:
      *            hash function
-     * @return the number of frames that are reclaimed.
+     * @return the number of frames that are reclaimed. The value -1 is returned when no compaction was happened.
      * @throws HyracksDataException
      */
     int collectGarbage(ITuplePointerAccessor bufferAccessor, ITuplePartitionComputer tpc) throws HyracksDataException;