You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2020/10/02 01:07:43 UTC

[asterixdb] 07/11: [ASTERIXDB-2782][RT] Incorrect result in left outer nested loop join

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit d28b689de69fd4d739ee4207717fe4efa1cc5eab
Author: Ali Alsuliman <al...@gmail.com>
AuthorDate: Wed Sep 23 20:58:16 2020 -0700

    [ASTERIXDB-2782][RT] Incorrect result in left outer nested loop join
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Produce missing when the right side of the left outer join
    is empty similar to hash-join.
    
    Change-Id: I5f268477823f8739e1441d48f06d7cbd2bc5f7db
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8065
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ali Alsuliman <al...@gmail.com>
    Reviewed-by: Dmitry Lychagin <dm...@couchbase.com>
---
 .../empty-dataset/empty-dataset.01.ddl.sqlpp       | 27 ++++++++++++++++++
 .../empty-dataset/empty-dataset.02.update.sqlpp    | 22 +++++++++++++++
 .../empty-dataset/empty-dataset.03.query.sqlpp     | 23 ++++++++++++++++
 .../empty-dataset/empty-dataset.04.query.sqlpp     | 23 ++++++++++++++++
 .../empty-dataset/empty-dataset.05.query.sqlpp     | 23 ++++++++++++++++
 .../empty-dataset/empty-dataset.06.query.sqlpp     | 23 ++++++++++++++++
 .../empty-dataset/empty-dataset.03.adm             |  1 +
 .../empty-dataset/empty-dataset.04.adm             |  0
 .../empty-dataset/empty-dataset.05.adm             |  0
 .../empty-dataset/empty-dataset.06.adm             |  0
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 ++++
 .../hyracks/dataflow/std/join/NestedLoopJoin.java  | 32 +++++++++++++++++++---
 12 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.01.ddl.sqlpp
new file mode 100644
index 0000000..fbe4221
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.01.ddl.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE openType AS {id: string};
+CREATE DATASET ds1(openType) PRIMARY KEY id;
+CREATE DATASET ds2(openType) PRIMARY KEY id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.02.update.sqlpp
new file mode 100644
index 0000000..2a193cd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.02.update.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+INSERT INTO ds1 {"id": "1", "f": 3};
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.03.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.03.query.sqlpp
new file mode 100644
index 0000000..4a9afc2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.03.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds1 LEFT OUTER JOIN ds2 ON ds1.f > ds2.f
+SELECT ds1, ds2;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.04.query.sqlpp
new file mode 100644
index 0000000..954341c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.04.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds2 LEFT OUTER JOIN ds1 ON ds2.f > ds1.f
+SELECT ds1, ds2;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.05.query.sqlpp
new file mode 100644
index 0000000..e5ba174
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.05.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds1 JOIN ds2 ON ds1.f > ds2.f
+SELECT ds1, ds2;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.06.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.06.query.sqlpp
new file mode 100644
index 0000000..9e6c335
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/leftouterjoin/empty-dataset/empty-dataset.06.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds2 JOIN ds1 ON ds2.f > ds1.f
+SELECT ds1, ds2;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.03.adm
new file mode 100644
index 0000000..2cddb05
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.03.adm
@@ -0,0 +1 @@
+{ "ds1": { "id": "1", "f": 3 } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.04.adm
new file mode 100644
index 0000000..e69de29
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.05.adm
new file mode 100644
index 0000000..e69de29
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.06.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/leftouterjoin/empty-dataset/empty-dataset.06.adm
new file mode 100644
index 0000000..e69de29
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index be3e55c..ca9e269 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -11815,6 +11815,11 @@
         <output-dir compare="Text">query_issue849-2</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="leftouterjoin">
+      <compilation-unit name="empty-dataset">
+        <output-dir compare="Text">empty-dataset</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="index-leftouterjoin">
     <test-case FilePath="index-leftouterjoin">
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/NestedLoopJoin.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/NestedLoopJoin.java
index 361d1ee..2eae25c 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/NestedLoopJoin.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/NestedLoopJoin.java
@@ -113,9 +113,15 @@ public class NestedLoopJoin {
             RunFileReader runFileReader = runFileWriter.createReader();
             try {
                 runFileReader.open();
-                while (runFileReader.nextFrame(innerBuffer)) {
+                if (runFileReader.nextFrame(innerBuffer)) {
+                    do {
+                        for (int i = 0; i < outerBufferMngr.getNumFrames(); i++) {
+                            blockJoin(outerBufferMngr.getFrame(i, tempInfo), innerBuffer.getBuffer(), writer);
+                        }
+                    } while (runFileReader.nextFrame(innerBuffer));
+                } else if (isLeftOuter) {
                     for (int i = 0; i < outerBufferMngr.getNumFrames(); i++) {
-                        blockJoin(outerBufferMngr.getFrame(i, tempInfo), innerBuffer.getBuffer(), writer);
+                        appendMissing(outerBufferMngr.getFrame(i, tempInfo), writer);
                     }
                 }
             } finally {
@@ -178,6 +184,18 @@ public class NestedLoopJoin {
         FrameUtils.appendConcatToWriter(writer, appender, accessor1, tupleId1, accessor2, tupleId2);
     }
 
+    private void appendMissing(BufferInfo outerBufferInfo, IFrameWriter writer) throws HyracksDataException {
+        accessorOuter.reset(outerBufferInfo.getBuffer(), outerBufferInfo.getStartOffset(), outerBufferInfo.getLength());
+        int tupleCount = accessorOuter.getTupleCount();
+        for (int i = 0; i < tupleCount; ++i) {
+            final int[] ntFieldEndOffsets = missingTupleBuilder.getFieldEndOffsets();
+            final byte[] ntByteArray = missingTupleBuilder.getByteArray();
+            final int ntSize = missingTupleBuilder.getSize();
+            FrameUtils.appendConcatToWriter(writer, appender, accessorOuter, i, ntFieldEndOffsets, ntByteArray, 0,
+                    ntSize);
+        }
+    }
+
     public void closeCache() throws HyracksDataException {
         if (runFileWriter != null) {
             runFileWriter.close();
@@ -188,9 +206,15 @@ public class NestedLoopJoin {
         RunFileReader runFileReader = runFileWriter.createDeleteOnCloseReader();
         try {
             runFileReader.open();
-            while (runFileReader.nextFrame(innerBuffer)) {
+            if (runFileReader.nextFrame(innerBuffer)) {
+                do {
+                    for (int i = 0; i < outerBufferMngr.getNumFrames(); i++) {
+                        blockJoin(outerBufferMngr.getFrame(i, tempInfo), innerBuffer.getBuffer(), writer);
+                    }
+                } while (runFileReader.nextFrame(innerBuffer));
+            } else if (isLeftOuter) {
                 for (int i = 0; i < outerBufferMngr.getNumFrames(); i++) {
-                    blockJoin(outerBufferMngr.getFrame(i, tempInfo), innerBuffer.getBuffer(), writer);
+                    appendMissing(outerBufferMngr.getFrame(i, tempInfo), writer);
                 }
             }
         } finally {