You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by dl...@apache.org on 2021/01/16 04:34:49 UTC

[asterixdb] 01/06: [ASTERIXDB-2815][COMP] DISTINCT in subquery gives wrong result

This is an automated email from the ASF dual-hosted git repository.

dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit b7d6ddccf8477c74913b1b52b2536e0dd8975dcf
Author: Dmitry Lychagin <dm...@couchbase.com>
AuthorDate: Wed Jan 13 16:05:09 2021 -0800

    [ASTERIXDB-2815][COMP] DISTINCT in subquery gives wrong result
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Fix DISTINCT operator handling during subplan removal
    - Added testcases
    
    Change-Id: Idfe0aa09faba9bdad0c9c71e96d1facf07f401c0
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/9585
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ali Alsuliman <al...@gmail.com>
---
 .../RemoveLeftOuterUnnestForLeftOuterJoinRule.java |  3 +-
 .../subplan/InlineAllNtsInSubplanVisitor.java      |  7 +-
 ...nlineLeftNtsInSubplanJoinFlatteningVisitor.java |  7 +-
 .../SubplanSpecialFlatteningCheckVisitor.java      |  2 +-
 .../queries/subquery/query-ASTERIXDB-2815.sqlpp    | 43 +++++++++++++
 .../results/subquery/query-ASTERIXDB-2815.plan     | 74 ++++++++++++++++++++++
 .../query-ASTERIXDB-2815.1.ddl.sqlpp               | 33 ++++++++++
 .../query-ASTERIXDB-2815.2.update.sqlpp            | 50 +++++++++++++++
 .../query-ASTERIXDB-2815.3.query.sqlpp             | 30 +++++++++
 .../query-ASTERIXDB-2815.3.adm                     |  2 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 ++
 .../operators/logical/DistinctOperator.java        | 15 ++---
 .../visitors/CardinalityInferenceVisitor.java      |  2 +
 13 files changed, 257 insertions(+), 16 deletions(-)

diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveLeftOuterUnnestForLeftOuterJoinRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveLeftOuterUnnestForLeftOuterJoinRule.java
index 364816b..775a1df 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveLeftOuterUnnestForLeftOuterJoinRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveLeftOuterUnnestForLeftOuterJoinRule.java
@@ -91,7 +91,8 @@ public class RemoveLeftOuterUnnestForLeftOuterJoinRule implements IAlgebraicRewr
         Triple<Boolean, ILogicalExpression, ILogicalExpression> checkGbyResult =
                 checkUnnestAndGby(outerUnnest, gbyOperator);
         // The argument for listify and not(is-missing(...)) check should be variables.
-        if (!isVariableReference(checkGbyResult.second) || !isVariableReference(checkGbyResult.third)) {
+        if (!checkGbyResult.first || checkGbyResult.second == null || !isVariableReference(checkGbyResult.second)
+                || checkGbyResult.third == null || !isVariableReference(checkGbyResult.third)) {
             return false;
         }
 
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineAllNtsInSubplanVisitor.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineAllNtsInSubplanVisitor.java
index 78c4c5e..f7cbb9d 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineAllNtsInSubplanVisitor.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineAllNtsInSubplanVisitor.java
@@ -626,10 +626,11 @@ class InlineAllNtsInSubplanVisitor implements IQueryOperatorVisitor<ILogicalOper
     @Override
     public ILogicalOperator visitDistinctOperator(DistinctOperator op, Void arg) throws AlgebricksException {
         visitSingleInputOperator(op);
-        List<LogicalVariable> distinctVarList = op.getDistinctByVarList();
         for (LogicalVariable keyVar : correlatedKeyVars) {
-            if (!distinctVarList.contains(keyVar)) {
-                distinctVarList.add(keyVar);
+            if (!op.isDistinctByVar(keyVar)) {
+                VariableReferenceExpression keyVarRef = new VariableReferenceExpression(keyVar);
+                keyVarRef.setSourceLocation(op.getSourceLocation());
+                op.getExpressions().add(new MutableObject<>(keyVarRef));
             }
         }
         context.computeAndSetTypeEnvironmentForOperator(op);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineLeftNtsInSubplanJoinFlatteningVisitor.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineLeftNtsInSubplanJoinFlatteningVisitor.java
index 12596ff..8326f85 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineLeftNtsInSubplanJoinFlatteningVisitor.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/InlineLeftNtsInSubplanJoinFlatteningVisitor.java
@@ -361,10 +361,11 @@ class InlineLeftNtsInSubplanJoinFlatteningVisitor implements IQueryOperatorVisit
         if (!rewritten || !underJoin) {
             return op;
         }
-        List<LogicalVariable> distinctVarList = op.getDistinctByVarList();
         for (LogicalVariable keyVar : liveVarsFromSubplanInput) {
-            if (!distinctVarList.contains(keyVar)) {
-                distinctVarList.add(keyVar);
+            if (!op.isDistinctByVar(keyVar)) {
+                VariableReferenceExpression keyVarRef = new VariableReferenceExpression(keyVar);
+                keyVarRef.setSourceLocation(op.getSourceLocation());
+                op.getExpressions().add(new MutableObject<>(keyVarRef));
             }
         }
         context.computeAndSetTypeEnvironmentForOperator(op);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/SubplanSpecialFlatteningCheckVisitor.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/SubplanSpecialFlatteningCheckVisitor.java
index 288b01a..a0bb4b6 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/SubplanSpecialFlatteningCheckVisitor.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/subplan/SubplanSpecialFlatteningCheckVisitor.java
@@ -220,7 +220,7 @@ class SubplanSpecialFlatteningCheckVisitor implements IQueryOperatorVisitor<Bool
 
     @Override
     public Boolean visitDistinctOperator(DistinctOperator op, Void arg) throws AlgebricksException {
-        return visitInputs(op);
+        return visitTupleDiscardingOperator(op);
     }
 
     @Override
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/subquery/query-ASTERIXDB-2815.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/subquery/query-ASTERIXDB-2815.sqlpp
new file mode 100644
index 0000000..9fd4e02
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/subquery/query-ASTERIXDB-2815.sqlpp
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+create type t1 as {
+ _id: uuid
+};
+
+create dataset RawTweet(t1) primary key _id autogenerated;
+
+create dataset Evidence(t1) primary key _id autogenerated;
+
+create dataset Verification(t1) primary key _id autogenerated;
+
+select t.id, urls
+from RawTweet t
+let urls = (
+  select distinct value e.url
+  from Verification v, v.evidence ve, Evidence e
+  where t.id = v.tweet_id and ve = e.ev_id
+)
+where array_count(urls) > 2
+order by t.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/subquery/query-ASTERIXDB-2815.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/subquery/query-ASTERIXDB-2815.plan
new file mode 100644
index 0000000..363b2bd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/subquery/query-ASTERIXDB-2815.plan
@@ -0,0 +1,74 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- STREAM_PROJECT  |PARTITIONED|
+      -- ASSIGN  |PARTITIONED|
+        -- SORT_MERGE_EXCHANGE [$$75(ASC) ]  |PARTITIONED|
+          -- STABLE_SORT [$$75(ASC)]  |PARTITIONED|
+            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+              -- STREAM_PROJECT  |PARTITIONED|
+                -- STREAM_SELECT  |PARTITIONED|
+                  -- STREAM_PROJECT  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- PRE_CLUSTERED_GROUP_BY[$$70]  |PARTITIONED|
+                              {
+                                -- AGGREGATE  |LOCAL|
+                                  -- STREAM_SELECT  |LOCAL|
+                                    -- NESTED_TUPLE_SOURCE  |LOCAL|
+                              }
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          -- STREAM_PROJECT  |PARTITIONED|
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              -- HYBRID_HASH_JOIN [$$70][$$82]  |PARTITIONED|
+                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                  -- STREAM_PROJECT  |PARTITIONED|
+                                    -- ASSIGN  |PARTITIONED|
+                                      -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                        -- REPLICATE  |PARTITIONED|
+                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                            -- STREAM_PROJECT  |PARTITIONED|
+                                              -- ASSIGN  |PARTITIONED|
+                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                  -- DATASOURCE_SCAN (test.RawTweet)  |PARTITIONED|
+                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                -- HASH_PARTITION_EXCHANGE [$$82]  |PARTITIONED|
+                                  -- ASSIGN  |PARTITIONED|
+                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                      -- PRE_SORTED_DISTINCT_BY  |PARTITIONED|
+                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                          -- STABLE_SORT [$$62(ASC), $$82(ASC)]  |PARTITIONED|
+                                            -- HASH_PARTITION_EXCHANGE [$$62, $$82]  |PARTITIONED|
+                                              -- STREAM_PROJECT  |PARTITIONED|
+                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                  -- HYBRID_HASH_JOIN [$$ve][$$73]  |PARTITIONED|
+                                                    -- HASH_PARTITION_EXCHANGE [$$ve]  |PARTITIONED|
+                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                        -- UNNEST  |PARTITIONED|
+                                                          -- STREAM_PROJECT  |PARTITIONED|
+                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                              -- HYBRID_HASH_JOIN [$$80][$$76]  |PARTITIONED|
+                                                                -- HASH_PARTITION_EXCHANGE [$$80]  |PARTITIONED|
+                                                                  -- REPLICATE  |PARTITIONED|
+                                                                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                                        -- ASSIGN  |PARTITIONED|
+                                                                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                            -- DATASOURCE_SCAN (test.RawTweet)  |PARTITIONED|
+                                                                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                                -- HASH_PARTITION_EXCHANGE [$$76]  |PARTITIONED|
+                                                                  -- STREAM_PROJECT  |PARTITIONED|
+                                                                    -- ASSIGN  |PARTITIONED|
+                                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                          -- DATASOURCE_SCAN (test.Verification)  |PARTITIONED|
+                                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                                                    -- HASH_PARTITION_EXCHANGE [$$73]  |PARTITIONED|
+                                                      -- STREAM_PROJECT  |PARTITIONED|
+                                                        -- ASSIGN  |PARTITIONED|
+                                                          -- STREAM_PROJECT  |PARTITIONED|
+                                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                              -- DATASOURCE_SCAN (test.Evidence)  |PARTITIONED|
+                                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.1.ddl.sqlpp
new file mode 100644
index 0000000..7de1e4e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.1.ddl.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+create type t1 as {
+ _id: uuid
+};
+
+create dataset RawTweet(t1) primary key _id autogenerated;
+
+create dataset Evidence(t1) primary key _id autogenerated;
+
+create dataset Verification(t1) primary key _id autogenerated;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.2.update.sqlpp
new file mode 100644
index 0000000..29a4ff6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.2.update.sqlpp
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+insert into RawTweet ([
+  { "id" : 1, "full_text": "text_1" }, -- total evidence : 2
+  { "id" : 2, "full_text": "text_2" }, -- total evidence : 4 (*) -- satisfies the query
+  { "id" : 3, "full_text": "text_3" }, -- total evidence : 0
+  { "id" : 4, "full_text": "text_4" }, -- total evidence : 6 (*) -- satisfies the query
+  { "id" : 5, "full_text": "text_5" }  -- total evidence : 1
+  ]);
+
+insert into Verification ([
+  { "ver_id" : 1001, "tweet_id": 1, "evidence": [ 2001, 2002 ] },
+  { "ver_id" : 1002, "tweet_id": 2, "evidence": [ 2003, 2004 ] },
+  { "ver_id" : 1003, "tweet_id": 2, "evidence": [ 2009, 2010 ] },
+  { "ver_id" : 1004, "tweet_id": 4, "evidence": [ 2004, 2005, 2006 ] },
+  { "ver_id" : 1005, "tweet_id": 4, "evidence": [ 2007, 2008, 2009 ] },
+  { "ver_id" : 1006, "tweet_id": 5, "evidence": [ 2001 ] }
+]);
+
+insert into Evidence ([
+  { "ev_id" : 2001, "url": "http://example.org/2001" },
+  { "ev_id" : 2002, "url": "http://example.org/2002" },
+  { "ev_id" : 2003, "url": "http://example.org/2003" },
+  { "ev_id" : 2004, "url": "http://example.org/2004" },
+  { "ev_id" : 2005, "url": "http://example.org/2005" },
+  { "ev_id" : 2006, "url": "http://example.org/2006" },
+  { "ev_id" : 2007, "url": "http://example.org/2007" },
+  { "ev_id" : 2008, "url": "http://example.org/2008" },
+  { "ev_id" : 2009, "url": "http://example.org/2009" },
+  { "ev_id" : 2010, "url": "http://example.org/2010" }
+]);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.3.query.sqlpp
new file mode 100644
index 0000000..9072bb7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select t.id, array_sort(urls) urls
+from RawTweet t
+let urls = (
+  select distinct value e.url
+  from Verification v, v.evidence ve, Evidence e
+  where t.id = v.tweet_id and ve = e.ev_id
+)
+where array_count(urls) > 2
+order by t.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.3.adm
new file mode 100644
index 0000000..448de2d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/subquery/query-ASTERIXDB-2815/query-ASTERIXDB-2815.3.adm
@@ -0,0 +1,2 @@
+{ "urls": [ "http://example.org/2003", "http://example.org/2004", "http://example.org/2009", "http://example.org/2010" ], "id": 2 }
+{ "urls": [ "http://example.org/2004", "http://example.org/2005", "http://example.org/2006", "http://example.org/2007", "http://example.org/2008", "http://example.org/2009" ], "id": 4 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index bca453d..813af62 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -10400,6 +10400,11 @@
         <output-dir compare="Text">query-ASTERIXDB-1674</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="subquery">
+      <compilation-unit name="query-ASTERIXDB-2815">
+        <output-dir compare="Text">query-ASTERIXDB-2815</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="subset-collection">
     <test-case FilePath="subset-collection">
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java
index 9ac6659..673d80d 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java
@@ -52,8 +52,8 @@ public class DistinctOperator extends AbstractLogicalOperator {
 
     @Override
     public void recomputeSchema() {
-        schema = new ArrayList<LogicalVariable>();
-        schema.addAll(this.getDistinctByVarList());
+        schema = new ArrayList<>();
+        schema.addAll(getDistinctByVarList());
         List<LogicalVariable> inputSchema = inputs.get(0).getValue().getSchema();
         for (LogicalVariable var : inputSchema) {
             if (!schema.contains(var)) {
@@ -66,13 +66,12 @@ public class DistinctOperator extends AbstractLogicalOperator {
     public VariablePropagationPolicy getVariablePropagationPolicy() {
         return new VariablePropagationPolicy() {
             @Override
-            public void propagateVariables(IOperatorSchema target, IOperatorSchema... sources)
-                    throws AlgebricksException {
-                /** make sure distinct key vars laid-out first */
+            public void propagateVariables(IOperatorSchema target, IOperatorSchema... sources) {
+                /* make sure distinct key vars laid-out first */
                 for (LogicalVariable keyVar : getDistinctByVarList()) {
                     target.addVariable(keyVar);
                 }
-                /** add other source vars */
+                /* add other source vars */
                 for (IOperatorSchema srcSchema : sources) {
                     for (LogicalVariable srcVar : srcSchema)
                         if (target.findVariable(srcVar) < 0) {
@@ -105,7 +104,7 @@ public class DistinctOperator extends AbstractLogicalOperator {
     }
 
     public List<LogicalVariable> getDistinctByVarList() {
-        List<LogicalVariable> varList = new ArrayList<LogicalVariable>(expressions.size());
+        List<LogicalVariable> varList = new ArrayList<>(expressions.size());
         for (Mutable<ILogicalExpression> eRef : expressions) {
             ILogicalExpression e = eRef.getValue();
             if (e.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
@@ -121,7 +120,7 @@ public class DistinctOperator extends AbstractLogicalOperator {
             ILogicalExpression e = eRef.getValue();
             if (e.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                 VariableReferenceExpression v = (VariableReferenceExpression) e;
-                if (v.getVariableReference() == var) {
+                if (v.getVariableReference().equals(var)) {
                     return true;
                 }
             }
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/CardinalityInferenceVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/CardinalityInferenceVisitor.java
index 54cbcd0..f68638d 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/CardinalityInferenceVisitor.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/CardinalityInferenceVisitor.java
@@ -244,6 +244,8 @@ public class CardinalityInferenceVisitor implements ILogicalOperatorVisitor<Long
 
     @Override
     public Long visitDistinctOperator(DistinctOperator op, Void arg) throws AlgebricksException {
+        // DISTINCT cannot reduce cardinality from ONE to ZERO_OR_ONE, or from ONE_GROUP to ZERO_OR_ONE_GROUP
+        // therefore we don't need to call adjustCardinalityForTupleReductionOperator() here.
         return op.getInputs().get(0).getValue().accept(this, arg);
     }