You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ti...@apache.org on 2018/06/14 04:11:21 UTC
[drill] 02/03: DRILL-6474: Don't use TopN when order by and offset
are used without a limit specified.
This is an automated email from the ASF dual-hosted git repository.
timothyfarkas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit 98dbc3a222990703aebe983883779763e0cdc1e9
Author: Timothy Farkas <ti...@apache.org>
AuthorDate: Wed Jun 6 12:04:39 2018 -0700
DRILL-6474: Don't use TopN when order by and offset are used without a limit specified.
closes #1313
---
.../exec/planner/physical/PushLimitToTopN.java | 21 +++++++++----
.../physical/impl/limit/TestLimitOperator.java | 23 ++++++++++++++
.../physical/impl/limit/TestLimitPlanning.java | 32 ++++++++++++++++++++
.../org/apache/drill/test/DrillTestWrapper.java | 24 ++++++++++++---
.../java/org/apache/drill/test/TestBuilder.java | 35 ++++++++++------------
5 files changed, 105 insertions(+), 30 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
index 66126ec..1053941 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
@@ -32,19 +32,28 @@ public class PushLimitToTopN extends Prule{
@Override
public boolean matches(RelOptRuleCall call) {
- return PrelUtil.getPlannerSettings(call.getPlanner()).getOptions()
- .getOption(PlannerSettings.TOPN.getOptionName()).bool_val;
+ boolean topNEnabled = PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(PlannerSettings.TOPN.getOptionName()).bool_val;
+
+ if (!topNEnabled) {
+ return false;
+ } else {
+ // If no limit is defined it doesn't make sense to use TopN since it could use unbounded memory in this case.
+ // We should use the sort and limit operator in this case.
+ // This also fixes DRILL-6474
+ final LimitPrel limit = call.rel(0);
+ return limit.getFetch() != null;
+ }
}
@Override
public void onMatch(RelOptRuleCall call) {
- final LimitPrel limit = (LimitPrel) call.rel(0);
- final SingleMergeExchangePrel smex = (SingleMergeExchangePrel) call.rel(1);
- final SortPrel sort = (SortPrel) call.rel(2);
+ final LimitPrel limit = call.rel(0);
+ final SingleMergeExchangePrel smex = call.rel(1);
+ final SortPrel sort = call.rel(2);
// First offset to include into results (inclusive). Null implies it is starting from offset 0
int offset = limit.getOffset() != null ? Math.max(0, RexLiteral.intValue(limit.getOffset())) : 0;
- int fetch = limit.getFetch() != null? Math.max(0, RexLiteral.intValue(limit.getFetch())) : 0;
+ int fetch = Math.max(0, RexLiteral.intValue(limit.getFetch()));
final TopNPrel topN = new TopNPrel(limit.getCluster(), sort.getTraitSet(), sort.getInput(), offset + fetch, sort.getCollation());
final LimitPrel newLimit = new LimitPrel(limit.getCluster(), limit.getTraitSet(),
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
index 22c0013..7225edc 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
@@ -20,12 +20,35 @@ package org.apache.drill.exec.physical.impl.limit;
import com.google.common.collect.Lists;
import org.apache.drill.exec.physical.config.Limit;
import org.apache.drill.exec.physical.unit.PhysicalOpUnitTestBase;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterFixtureBuilder;
+import org.junit.Rule;
import org.junit.Test;
import java.util.List;
public class TestLimitOperator extends PhysicalOpUnitTestBase {
+ @Rule
+ public BaseDirTestWatcher baseDirTestWatcher = new BaseDirTestWatcher();
+
+ // DRILL-6474
+ @Test
+ public void testLimitIntegrationTest() throws Exception {
+ final ClusterFixtureBuilder builder = new ClusterFixtureBuilder(baseDirTestWatcher);
+
+ try (ClusterFixture clusterFixture = builder.build();
+ ClientFixture clientFixture = clusterFixture.clientFixture()) {
+ clientFixture.testBuilder()
+ .sqlQuery("select name_s10 from `mock`.`employees_100000` order by name_s10 offset 100")
+ .expectsNumRecords(99900)
+ .build()
+ .run();
+ }
+ }
+
@Test
public void testLimitMoreRecords() {
Limit limitConf = new Limit(null, 0, 10);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
new file mode 100644
index 0000000..3f5fee2
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.limit;
+
+import org.apache.drill.PlanTestBase;
+import org.junit.Test;
+
+public class TestLimitPlanning extends PlanTestBase {
+
+ // DRILL-6474
+ @Test
+ public void dontPushdownIntoTopNWhenNoLimit() throws Exception {
+ String query = "select full_name from cp.`employee.json` order by full_name offset 10";
+
+ PlanTestBase.testPlanMatchingPatterns(query, new String[]{".*Sort\\(.*"}, new String[]{".*TopN\\(.*"});
+ }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
index 0dfc1f7..051f4b3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
@@ -33,6 +33,7 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
+import com.google.common.base.Preconditions;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
@@ -85,6 +86,7 @@ public class DrillTestWrapper {
// Unit test doesn't expect any specific batch count
public static final int EXPECTED_BATCH_COUNT_NOT_SET = -1;
+ public static final int EXPECTED_NUM_RECORDS_NOT_SET = - 1;
// The motivation behind the TestBuilder was to provide a clean API for test writers. The model is mostly designed to
// prepare all of the components necessary for running the tests, before the TestWrapper is initialized. There is however
@@ -119,11 +121,13 @@ public class DrillTestWrapper {
private List<Map<String, Object>> baselineRecords;
private int expectedNumBatches;
+ private int expectedNumRecords;
public DrillTestWrapper(TestBuilder testBuilder, TestServices services, Object query, QueryType queryType,
String baselineOptionSettingQueries, String testOptionSettingQueries,
QueryType baselineQueryType, boolean ordered, boolean highPerformanceComparison,
- String[] baselineColumns, List<Map<String, Object>> baselineRecords, int expectedNumBatches) {
+ String[] baselineColumns, List<Map<String, Object>> baselineRecords, int expectedNumBatches,
+ int expectedNumRecords) {
this.testBuilder = testBuilder;
this.services = services;
this.query = query;
@@ -136,6 +140,13 @@ public class DrillTestWrapper {
this.baselineColumns = baselineColumns;
this.baselineRecords = baselineRecords;
this.expectedNumBatches = expectedNumBatches;
+ this.expectedNumRecords = expectedNumRecords;
+
+ Preconditions.checkArgument(!(baselineRecords != null && !ordered && highPerformanceComparison));
+ Preconditions.checkArgument((baselineRecords != null && expectedNumRecords == DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineRecords == null,
+ "Cannot define both baselineRecords and the expectedNumRecords.");
+ Preconditions.checkArgument((baselineQueryType != null && expectedNumRecords == DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineQueryType == null,
+ "Cannot define both a baselineQueryType and the expectedNumRecords.");
}
public void run() throws Exception {
@@ -527,9 +538,14 @@ public class DrillTestWrapper {
// If baseline data was not provided to the test builder directly, we must run a query for the baseline, this includes
// the cases where the baseline is stored in a file.
if (baselineRecords == null) {
- test(baselineOptionSettingQueries);
- expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
- addToMaterializedResults(expectedRecords, expected, loader);
+ if (expectedNumRecords != DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) {
+ Assert.assertEquals(expectedNumRecords, actualRecords.size());
+ return;
+ } else {
+ test(baselineOptionSettingQueries);
+ expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
+ addToMaterializedResults(expectedRecords, expected, loader);
+ }
} else {
expectedRecords = baselineRecords;
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
index e40f86d..98a0a9a 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
@@ -91,6 +91,7 @@ public class TestBuilder {
private List<Map<String, Object>> baselineRecords;
private int expectedNumBatches = DrillTestWrapper.EXPECTED_BATCH_COUNT_NOT_SET;
+ private int expectedNumRecords = DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET;
public TestBuilder(TestServices services) {
this.services = services;
@@ -127,12 +128,9 @@ public class TestBuilder {
return this;
}
- public DrillTestWrapper build() throws Exception {
- if ( ! ordered && highPerformanceComparison ) {
- throw new Exception("High performance comparison only available for ordered checks, to enforce this restriction, ordered() must be called first.");
- }
+ public DrillTestWrapper build() {
return new DrillTestWrapper(this, services, query, queryType, baselineOptionSettingQueries, testOptionSettingQueries,
- getValidationQueryType(), ordered, highPerformanceComparison, baselineColumns, baselineRecords, expectedNumBatches);
+ getValidationQueryType(), ordered, highPerformanceComparison, baselineColumns, baselineRecords, expectedNumBatches, expectedNumRecords);
}
public List<Pair<SchemaPath, TypeProtos.MajorType>> getExpectedSchema() {
@@ -248,17 +246,8 @@ public class TestBuilder {
throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
}
- protected UserBitShared.QueryType getValidationQueryType() throws Exception {
- if (singleExplicitBaselineRecord()) {
- return null;
- }
-
- if (ordered) {
- // If there are no base line records or no baseline query then we will check to make sure that the records are in ascending order
- return null;
- }
-
- throw new RuntimeException("Must provide some kind of baseline, either a baseline file or another query");
+ protected UserBitShared.QueryType getValidationQueryType() {
+ return null;
}
public JSONTestBuilder jsonBaselineFile(String filePath) {
@@ -329,6 +318,12 @@ public class TestBuilder {
return this;
}
+ public TestBuilder expectsNumRecords(int expectedNumRecords) {
+ this.expectedNumRecords = expectedNumRecords;
+ this.ordered = false;
+ return this;
+ }
+
/**
* This method is used to pass in a simple list of values for a single record verification without
* the need to create a CSV or JSON file to store the baseline.
@@ -544,7 +539,7 @@ public class TestBuilder {
}
@Override
- protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ protected UserBitShared.QueryType getValidationQueryType() {
return UserBitShared.QueryType.SQL;
}
}
@@ -577,7 +572,7 @@ public class TestBuilder {
}
@Override
- protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ protected UserBitShared.QueryType getValidationQueryType() {
return null;
}
@@ -608,7 +603,7 @@ public class TestBuilder {
}
@Override
- protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ protected UserBitShared.QueryType getValidationQueryType() {
return UserBitShared.QueryType.SQL;
}
@@ -639,7 +634,7 @@ public class TestBuilder {
}
@Override
- protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+ protected UserBitShared.QueryType getValidationQueryType() {
return baselineQueryType;
}
--
To stop receiving notification emails like this one, please contact
timothyfarkas@apache.org.