You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ti...@apache.org on 2018/06/14 04:11:21 UTC

[drill] 02/03: DRILL-6474: Don't use TopN when order by and offset are used without a limit specified.

This is an automated email from the ASF dual-hosted git repository.

timothyfarkas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 98dbc3a222990703aebe983883779763e0cdc1e9
Author: Timothy Farkas <ti...@apache.org>
AuthorDate: Wed Jun 6 12:04:39 2018 -0700

    DRILL-6474: Don't use TopN when order by and offset are used without a limit specified.
    
    closes #1313
---
 .../exec/planner/physical/PushLimitToTopN.java     | 21 +++++++++----
 .../physical/impl/limit/TestLimitOperator.java     | 23 ++++++++++++++
 .../physical/impl/limit/TestLimitPlanning.java     | 32 ++++++++++++++++++++
 .../org/apache/drill/test/DrillTestWrapper.java    | 24 ++++++++++++---
 .../java/org/apache/drill/test/TestBuilder.java    | 35 ++++++++++------------
 5 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
index 66126ec..1053941 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
@@ -32,19 +32,28 @@ public class PushLimitToTopN  extends Prule{
 
   @Override
   public boolean matches(RelOptRuleCall call) {
-    return PrelUtil.getPlannerSettings(call.getPlanner()).getOptions()
-      .getOption(PlannerSettings.TOPN.getOptionName()).bool_val;
+    boolean topNEnabled = PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(PlannerSettings.TOPN.getOptionName()).bool_val;
+
+    if (!topNEnabled) {
+      return false;
+    } else {
+      // If no limit is defined it doesn't make sense to use TopN since it could use unbounded memory in this case.
+      // We should use the sort and limit operator in this case.
+      // This also fixes DRILL-6474
+      final LimitPrel limit = call.rel(0);
+      return limit.getFetch() != null;
+    }
   }
 
   @Override
   public void onMatch(RelOptRuleCall call) {
-    final LimitPrel limit = (LimitPrel) call.rel(0);
-    final SingleMergeExchangePrel smex = (SingleMergeExchangePrel) call.rel(1);
-    final SortPrel sort = (SortPrel) call.rel(2);
+    final LimitPrel limit = call.rel(0);
+    final SingleMergeExchangePrel smex = call.rel(1);
+    final SortPrel sort = call.rel(2);
 
     // First offset to include into results (inclusive). Null implies it is starting from offset 0
     int offset = limit.getOffset() != null ? Math.max(0, RexLiteral.intValue(limit.getOffset())) : 0;
-    int fetch = limit.getFetch() != null?  Math.max(0, RexLiteral.intValue(limit.getFetch())) : 0;
+    int fetch = Math.max(0, RexLiteral.intValue(limit.getFetch()));
 
     final TopNPrel topN = new TopNPrel(limit.getCluster(), sort.getTraitSet(), sort.getInput(), offset + fetch, sort.getCollation());
     final LimitPrel newLimit = new LimitPrel(limit.getCluster(), limit.getTraitSet(),
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
index 22c0013..7225edc 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
@@ -20,12 +20,35 @@ package org.apache.drill.exec.physical.impl.limit;
 import com.google.common.collect.Lists;
 import org.apache.drill.exec.physical.config.Limit;
 import org.apache.drill.exec.physical.unit.PhysicalOpUnitTestBase;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterFixtureBuilder;
+import org.junit.Rule;
 import org.junit.Test;
 
 import java.util.List;
 
 public class TestLimitOperator extends PhysicalOpUnitTestBase {
 
+  @Rule
+  public BaseDirTestWatcher baseDirTestWatcher = new BaseDirTestWatcher();
+
+  // DRILL-6474
+  @Test
+  public void testLimitIntegrationTest() throws Exception {
+    final ClusterFixtureBuilder builder = new ClusterFixtureBuilder(baseDirTestWatcher);
+
+    try (ClusterFixture clusterFixture = builder.build();
+         ClientFixture clientFixture = clusterFixture.clientFixture()) {
+      clientFixture.testBuilder()
+        .sqlQuery("select name_s10 from `mock`.`employees_100000` order by name_s10 offset 100")
+        .expectsNumRecords(99900)
+        .build()
+        .run();
+    }
+  }
+
   @Test
   public void testLimitMoreRecords() {
     Limit limitConf = new Limit(null, 0, 10);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
new file mode 100644
index 0000000..3f5fee2
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.limit;
+
+import org.apache.drill.PlanTestBase;
+import org.junit.Test;
+
+public class TestLimitPlanning extends PlanTestBase {
+
+  // DRILL-6474
+  @Test
+  public void dontPushdownIntoTopNWhenNoLimit() throws Exception {
+    String query = "select full_name from cp.`employee.json` order by full_name offset 10";
+
+    PlanTestBase.testPlanMatchingPatterns(query, new String[]{".*Sort\\(.*"}, new String[]{".*TopN\\(.*"});
+  }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
index 0dfc1f7..051f4b3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
@@ -33,6 +33,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 
+import com.google.common.base.Preconditions;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
@@ -85,6 +86,7 @@ public class DrillTestWrapper {
 
   // Unit test doesn't expect any specific batch count
   public static final int EXPECTED_BATCH_COUNT_NOT_SET = -1;
+  public static final int EXPECTED_NUM_RECORDS_NOT_SET = - 1;
 
   // The motivation behind the TestBuilder was to provide a clean API for test writers. The model is mostly designed to
   // prepare all of the components necessary for running the tests, before the TestWrapper is initialized. There is however
@@ -119,11 +121,13 @@ public class DrillTestWrapper {
   private List<Map<String, Object>> baselineRecords;
 
   private int expectedNumBatches;
+  private int expectedNumRecords;
 
   public DrillTestWrapper(TestBuilder testBuilder, TestServices services, Object query, QueryType queryType,
       String baselineOptionSettingQueries, String testOptionSettingQueries,
       QueryType baselineQueryType, boolean ordered, boolean highPerformanceComparison,
-      String[] baselineColumns, List<Map<String, Object>> baselineRecords, int expectedNumBatches) {
+      String[] baselineColumns, List<Map<String, Object>> baselineRecords, int expectedNumBatches,
+      int expectedNumRecords) {
     this.testBuilder = testBuilder;
     this.services = services;
     this.query = query;
@@ -136,6 +140,13 @@ public class DrillTestWrapper {
     this.baselineColumns = baselineColumns;
     this.baselineRecords = baselineRecords;
     this.expectedNumBatches = expectedNumBatches;
+    this.expectedNumRecords = expectedNumRecords;
+
+    Preconditions.checkArgument(!(baselineRecords != null && !ordered && highPerformanceComparison));
+    Preconditions.checkArgument((baselineRecords != null && expectedNumRecords == DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineRecords == null,
+      "Cannot define both baselineRecords and the expectedNumRecords.");
+    Preconditions.checkArgument((baselineQueryType != null && expectedNumRecords == DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineQueryType == null,
+      "Cannot define both a baselineQueryType and the expectedNumRecords.");
   }
 
   public void run() throws Exception {
@@ -527,9 +538,14 @@ public class DrillTestWrapper {
       // If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes
       // the cases where the baseline is stored in a file.
       if (baselineRecords == null) {
-        test(baselineOptionSettingQueries);
-        expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
-        addToMaterializedResults(expectedRecords, expected, loader);
+        if (expectedNumRecords != DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) {
+          Assert.assertEquals(expectedNumRecords, actualRecords.size());
+          return;
+        } else {
+          test(baselineOptionSettingQueries);
+          expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
+          addToMaterializedResults(expectedRecords, expected, loader);
+        }
       } else {
         expectedRecords = baselineRecords;
       }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
index e40f86d..98a0a9a 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
@@ -91,6 +91,7 @@ public class TestBuilder {
   private List<Map<String, Object>> baselineRecords;
 
   private int expectedNumBatches = DrillTestWrapper.EXPECTED_BATCH_COUNT_NOT_SET;
+  private int expectedNumRecords = DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET;
 
   public TestBuilder(TestServices services) {
     this.services = services;
@@ -127,12 +128,9 @@ public class TestBuilder {
     return this;
   }
 
-  public DrillTestWrapper build() throws Exception {
-    if ( ! ordered && highPerformanceComparison ) {
-      throw new Exception("High performance comparison only available for ordered checks, to enforce this restriction, ordered() must be called first.");
-    }
+  public DrillTestWrapper build() {
     return new DrillTestWrapper(this, services, query, queryType, baselineOptionSettingQueries, testOptionSettingQueries,
-        getValidationQueryType(), ordered, highPerformanceComparison, baselineColumns, baselineRecords, expectedNumBatches);
+        getValidationQueryType(), ordered, highPerformanceComparison, baselineColumns, baselineRecords, expectedNumBatches, expectedNumRecords);
   }
 
   public List<Pair<SchemaPath, TypeProtos.MajorType>> getExpectedSchema() {
@@ -248,17 +246,8 @@ public class TestBuilder {
     throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
   }
 
-  protected UserBitShared.QueryType getValidationQueryType() throws Exception {
-    if (singleExplicitBaselineRecord()) {
-      return null;
-    }
-
-    if (ordered) {
-      // If there are no base line records or no baseline query then we will check to make sure that the records are in ascending order
-      return null;
-    }
-
-    throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
+  protected UserBitShared.QueryType getValidationQueryType() {
+    return null;
   }
 
   public JSONTestBuilder jsonBaselineFile(String filePath) {
@@ -329,6 +318,12 @@ public class TestBuilder {
     return this;
   }
 
+  public TestBuilder expectsNumRecords(int expectedNumRecords) {
+    this.expectedNumRecords = expectedNumRecords;
+    this.ordered = false;
+    return this;
+  }
+
   /**
    * This method is used to pass in a simple list of values for a single record verification without
    * the need to create a CSV or JSON file to store the baseline.
@@ -544,7 +539,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return UserBitShared.QueryType.SQL;
     }
   }
@@ -577,7 +572,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return null;
     }
 
@@ -608,7 +603,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return UserBitShared.QueryType.SQL;
     }
 
@@ -639,7 +634,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return baselineQueryType;
     }
 

-- 
To stop receiving notification emails like this one, please contact
timothyfarkas@apache.org.