You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by gp...@apache.org on 2019/03/28 23:10:46 UTC
[drill] branch master updated: DRILL-7121: Use the NDV guess (same
as before) when statistics is disabled
This is an automated email from the ASF dual-hosted git repository.
gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 56d0a2e DRILL-7121: Use the NDV guess (same as before) when statistics is disabled
56d0a2e is described below
commit 56d0a2eb40c49d278dc4fbb099008371b9f4da9c
Author: Gautam Parai <gp...@maprtech.com>
AuthorDate: Mon Mar 25 12:08:31 2019 -0700
DRILL-7121: Use the NDV guess (same as before) when statistics is disabled
closes #1718
---
.../drill/exec/planner/common/DrillRelOptUtil.java | 21 +++-----
.../drill/exec/planner/common/DrillStatsTable.java | 3 +-
.../planner/cost/DrillRelMdDistinctRowCount.java | 61 +++++++++-------------
.../exec/planner/cost/DrillRelMdRowCount.java | 13 ++---
.../exec/planner/cost/DrillRelMdSelectivity.java | 7 +--
5 files changed, 41 insertions(+), 64 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
index 1425466..fbc1500 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
@@ -25,18 +25,13 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
-
-import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
-import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
-import org.apache.drill.shaded.guava.com.google.common.collect.Sets;
-import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.plan.volcano.RelSubset;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.rules.ProjectRemoveRule;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
@@ -59,12 +54,15 @@ import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.planner.logical.DrillRelFactories;
-import org.apache.drill.exec.planner.logical.FieldsReWriterUtil;
import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
+import org.apache.drill.exec.planner.logical.FieldsReWriterUtil;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.resolver.TypeCastRules;
import org.apache.drill.exec.util.Utilities;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+import org.apache.drill.shaded.guava.com.google.common.collect.Sets;
/**
* Utility class that is a subset of the RelOptUtil class and is a placeholder for Drill specific
@@ -584,10 +582,7 @@ public abstract class DrillRelOptUtil {
return guessRows(((HepRelVertex) rel).getCurrentRel());
}
} else if (rel instanceof TableScan) {
- DrillTable table = rel.getTable().unwrap(DrillTable.class);
- if (table == null) {
- table = rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
- }
+ DrillTable table = Utilities.getDrillTable(rel.getTable());
if (table != null
&& table.getStatsTable() != null
&& table.getStatsTable().isMaterialized()) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
index e934dfc..7e030da 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
@@ -48,6 +48,7 @@ import org.apache.drill.exec.store.dfs.FormatPlugin;
import org.apache.drill.exec.store.dfs.FormatSelection;
import org.apache.drill.exec.store.parquet.ParquetFormatConfig;
import org.apache.drill.exec.util.ImpersonationUtil;
+import org.apache.drill.exec.util.Utilities;
import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -213,7 +214,7 @@ public class DrillStatsTable {
public void visit(RelNode node, int ordinal, RelNode parent) {
if (node instanceof TableScan) {
try {
- final DrillTable drillTable = node.getTable().unwrap(DrillTable.class);
+ final DrillTable drillTable = Utilities.getDrillTable(node.getTable());
final DrillStatsTable statsTable = drillTable.getStatsTable();
if (statsTable != null) {
statsTable.materialize(drillTable, context);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
index 6380b52..f7d2ff6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
@@ -17,13 +17,14 @@
*/
package org.apache.drill.exec.planner.cost;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.volcano.RelSubset;
import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.SingleRel;
+import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.core.Window;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
@@ -38,12 +39,11 @@ import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.drill.exec.planner.common.DrillJoinRelBase;
import org.apache.drill.exec.planner.common.DrillRelOptUtil;
+import org.apache.drill.exec.planner.common.DrillScanRelBase;
import org.apache.drill.exec.planner.common.DrillStatsTable;
+import org.apache.drill.exec.planner.logical.DrillScanRel;
import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
-
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.drill.exec.util.Utilities;
public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
private static final DrillRelMdDistinctRowCount INSTANCE =
@@ -75,8 +75,21 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
@Override
public Double getDistinctRowCount(RelNode rel, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) {
- if (rel instanceof TableScan && !DrillRelOptUtil.guessRows(rel)) {
- return getDistinctRowCount((TableScan) rel, mq, groupKey, predicate);
+ if (rel instanceof DrillScanRelBase) { // Applies to both Drill Logical and Physical Rels
+ DrillTable table = Utilities.getDrillTable(rel.getTable());
+ if (table != null && table.getStatsTable() != null && !DrillRelOptUtil.guessRows(rel)) {
+ return getDistinctRowCount(((DrillScanRelBase)rel), mq, table, groupKey, rel.getRowType(), predicate);
+ } else {
+ /* If we are not using statistics OR there is no table or metadata (stats) table associated with scan,
+ * estimate the distinct row count. Consistent with the estimation of Aggregate row count in
+ * RelMdRowCount: distinctRowCount = rowCount * 10%.
+ */
+ if (rel instanceof DrillScanRel) {
+ // The existing Drill behavior is to only use this estimation for DrillScanRel and not ScanPrel.
+ // TODO: We may potentially do it for ScanPrel (outside the scope of statistics)
+ return rel.estimateRowCount(mq) * 0.1;
+ }
+ }
} else if (rel instanceof SingleRel && !DrillRelOptUtil.guessRows(rel)) {
if (rel instanceof Window) {
int childFieldCount = ((Window)rel).getInput().getRowType().getFieldCount();
@@ -88,10 +101,7 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
}
}
return mq.getDistinctRowCount(((SingleRel) rel).getInput(), groupKey, predicate);
- } else if (rel instanceof DrillJoinRelBase) {
- if (DrillRelOptUtil.guessRows(rel)) {
- return super.getDistinctRowCount(rel, mq, groupKey, predicate);
- }
+ } else if (rel instanceof DrillJoinRelBase && !DrillRelOptUtil.guessRows(rel)) {
//Assume ndv is unaffected by the join
return getDistinctRowCount(((DrillJoinRelBase) rel), mq, groupKey, predicate);
} else if (rel instanceof RelSubset && !DrillRelOptUtil.guessRows(rel)) {
@@ -99,12 +109,9 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
return mq.getDistinctRowCount(((RelSubset)rel).getBest(), groupKey, predicate);
} else if (((RelSubset) rel).getOriginal() != null) {
return mq.getDistinctRowCount(((RelSubset)rel).getOriginal(), groupKey, predicate);
- } else {
- return super.getDistinctRowCount(rel, mq, groupKey, predicate);
}
- } else {
- return super.getDistinctRowCount(rel, mq, groupKey, predicate);
}
+ return super.getDistinctRowCount(rel, mq, groupKey, predicate);
}
/**
@@ -112,27 +119,9 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
* set of columns indicated by groupKey.
* column").
*/
- private Double getDistinctRowCount(TableScan scan, RelMetadataQuery mq, ImmutableBitSet groupKey,
- RexNode predicate) {
- DrillTable table = scan.getTable().unwrap(DrillTable.class);
- if (table == null) {
- table = scan.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
- }
- return getDistinctRowCountInternal(scan, mq, table, groupKey, scan.getRowType(), predicate);
- }
-
- private Double getDistinctRowCountInternal(RelNode scan, RelMetadataQuery mq, DrillTable table,
- ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
+ private Double getDistinctRowCount(DrillScanRelBase scan, RelMetadataQuery mq, DrillTable table,
+ ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
double selectivity, rowCount;
- // If guessing, return NDV as 0.1 * rowCount
- if (DrillRelOptUtil.guessRows(scan)) {
- /* If there is no table or metadata (stats) table associated with scan, estimate the
- * distinct row count. Consistent with the estimation of Aggregate row count in
- * RelMdRowCount: distinctRowCount = rowCount * 10%.
- */
- return scan.estimateRowCount(mq) * 0.1;
- }
-
/* If predicate is present, determine its selectivity to estimate filtered rows.
* Thereafter, compute the number of distinct rows.
*/
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
index f854480..f9eb41b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
@@ -18,16 +18,15 @@
package org.apache.drill.exec.planner.cost;
import java.io.IOException;
-
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.SingleRel;
import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.Sort;
-import org.apache.calcite.rel.core.Union;
-import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdRowCount;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
@@ -37,9 +36,9 @@ import org.apache.calcite.util.ImmutableBitSet;
import org.apache.drill.exec.planner.common.DrillLimitRelBase;
import org.apache.drill.exec.planner.common.DrillRelOptUtil;
import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.planner.physical.PrelUtil;
+import org.apache.drill.exec.util.Utilities;
public class DrillRelMdRowCount extends RelMdRowCount{
@@ -101,16 +100,12 @@ public class DrillRelMdRowCount extends RelMdRowCount{
}
private Double getRowCountInternal(TableScan rel, RelMetadataQuery mq) {
- DrillTable table;
+ DrillTable table = Utilities.getDrillTable(rel.getTable());
PlannerSettings settings = PrelUtil.getSettings(rel.getCluster());
// If guessing, return selectivity from RelMDRowCount
if (DrillRelOptUtil.guessRows(rel)) {
return super.getRowCount(rel, mq);
}
- table = rel.getTable().unwrap(DrillTable.class);
- if (table == null) {
- table = rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
- }
// Return rowcount from statistics, if available. Otherwise, delegate to parent.
try {
if (table != null
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
index ec72822..aae8b1d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
@@ -47,10 +47,10 @@ import org.apache.drill.exec.planner.common.DrillRelOptUtil;
import org.apache.drill.exec.planner.common.DrillScanRelBase;
import org.apache.drill.exec.planner.logical.DrillScanRel;
import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.planner.physical.PrelUtil;
import org.apache.drill.exec.planner.physical.ScanPrel;
+import org.apache.drill.exec.util.Utilities;
public class DrillRelMdSelectivity extends RelMdSelectivity {
private static final DrillRelMdSelectivity INSTANCE = new DrillRelMdSelectivity();
@@ -117,10 +117,7 @@ public class DrillRelMdSelectivity extends RelMdSelectivity {
if (DrillRelOptUtil.guessRows(rel)) {
return super.getSelectivity(rel, mq, predicate);
}
- DrillTable table = rel.getTable().unwrap(DrillTable.class);
- if (table == null) {
- table = rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
- }
+ DrillTable table = Utilities.getDrillTable(rel.getTable());
if (table != null && table.getStatsTable() != null && table.getStatsTable().isMaterialized()) {
if (rel instanceof DrillScanRelBase) {
List<String> fieldNames = new ArrayList<>();