You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by gp...@apache.org on 2019/03/28 23:10:46 UTC

[drill] branch master updated: DRILL-7121: Use the NDV guess (same as before) when statistics is disabled

This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 56d0a2e  DRILL-7121: Use the NDV guess (same as before) when statistics is disabled
56d0a2e is described below

commit 56d0a2eb40c49d278dc4fbb099008371b9f4da9c
Author: Gautam Parai <gp...@maprtech.com>
AuthorDate: Mon Mar 25 12:08:31 2019 -0700

    DRILL-7121: Use the NDV guess (same as before) when statistics is disabled
    
    closes #1718
---
 .../drill/exec/planner/common/DrillRelOptUtil.java | 21 +++-----
 .../drill/exec/planner/common/DrillStatsTable.java |  3 +-
 .../planner/cost/DrillRelMdDistinctRowCount.java   | 61 +++++++++-------------
 .../exec/planner/cost/DrillRelMdRowCount.java      | 13 ++---
 .../exec/planner/cost/DrillRelMdSelectivity.java   |  7 +--
 5 files changed, 41 insertions(+), 64 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
index 1425466..fbc1500 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
@@ -25,18 +25,13 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-
-import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
-import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
-import org.apache.drill.shaded.guava.com.google.common.collect.Sets;
-import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.plan.volcano.RelSubset;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.rules.ProjectRemoveRule;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
@@ -59,12 +54,15 @@ import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.Types;
 import org.apache.drill.exec.planner.logical.DrillRelFactories;
-import org.apache.drill.exec.planner.logical.FieldsReWriterUtil;
 import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
+import org.apache.drill.exec.planner.logical.FieldsReWriterUtil;
 import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.apache.drill.exec.resolver.TypeCastRules;
 import org.apache.drill.exec.util.Utilities;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+import org.apache.drill.shaded.guava.com.google.common.collect.Sets;
 
 /**
  * Utility class that is a subset of the RelOptUtil class and is a placeholder for Drill specific
@@ -584,10 +582,7 @@ public abstract class DrillRelOptUtil {
         return guessRows(((HepRelVertex) rel).getCurrentRel());
       }
     } else if (rel instanceof TableScan) {
-      DrillTable table = rel.getTable().unwrap(DrillTable.class);
-      if (table == null) {
-        table = rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
-      }
+      DrillTable table = Utilities.getDrillTable(rel.getTable());
       if (table != null
           && table.getStatsTable() != null
           && table.getStatsTable().isMaterialized()) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
index e934dfc..7e030da 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java
@@ -48,6 +48,7 @@ import org.apache.drill.exec.store.dfs.FormatPlugin;
 import org.apache.drill.exec.store.dfs.FormatSelection;
 import org.apache.drill.exec.store.parquet.ParquetFormatConfig;
 import org.apache.drill.exec.util.ImpersonationUtil;
+import org.apache.drill.exec.util.Utilities;
 import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -213,7 +214,7 @@ public class DrillStatsTable {
     public void visit(RelNode node, int ordinal, RelNode parent) {
       if (node instanceof TableScan) {
         try {
-          final DrillTable drillTable = node.getTable().unwrap(DrillTable.class);
+          final DrillTable drillTable = Utilities.getDrillTable(node.getTable());
           final DrillStatsTable statsTable = drillTable.getStatsTable();
           if (statsTable != null) {
             statsTable.materialize(drillTable, context);
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
index 6380b52..f7d2ff6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
@@ -17,13 +17,14 @@
  */
 package org.apache.drill.exec.planner.cost;
 
+import java.util.ArrayList;
+import java.util.List;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.volcano.RelSubset;
 import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.SingleRel;
+import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.TableScan;
 import org.apache.calcite.rel.core.Window;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
@@ -38,12 +39,11 @@ import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.drill.exec.planner.common.DrillJoinRelBase;
 import org.apache.drill.exec.planner.common.DrillRelOptUtil;
+import org.apache.drill.exec.planner.common.DrillScanRelBase;
 import org.apache.drill.exec.planner.common.DrillStatsTable;
+import org.apache.drill.exec.planner.logical.DrillScanRel;
 import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
-
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.drill.exec.util.Utilities;
 
 public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
   private static final DrillRelMdDistinctRowCount INSTANCE =
@@ -75,8 +75,21 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
 
   @Override
   public Double getDistinctRowCount(RelNode rel, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) {
-    if (rel instanceof TableScan && !DrillRelOptUtil.guessRows(rel)) {
-      return getDistinctRowCount((TableScan) rel, mq, groupKey, predicate);
+    if (rel instanceof DrillScanRelBase) {                  // Applies to both Drill Logical and Physical Rels
+      DrillTable table = Utilities.getDrillTable(rel.getTable());
+      if (table != null && table.getStatsTable() != null && !DrillRelOptUtil.guessRows(rel)) {
+        return getDistinctRowCount(((DrillScanRelBase)rel), mq, table, groupKey, rel.getRowType(), predicate);
+      } else {
+        /* If we are not using statistics OR there is no table or metadata (stats) table associated with scan,
+         * estimate the distinct row count. Consistent with the estimation of Aggregate row count in
+         * RelMdRowCount: distinctRowCount = rowCount * 10%.
+         */
+        if (rel instanceof DrillScanRel) {
+          // The existing Drill behavior is to only use this estimation for DrillScanRel and not ScanPrel.
+          // TODO: We may potentially do it for ScanPrel (outside the scope of statistics)
+          return rel.estimateRowCount(mq) * 0.1;
+        }
+      }
     } else if (rel instanceof SingleRel && !DrillRelOptUtil.guessRows(rel)) {
         if (rel instanceof Window) {
           int childFieldCount = ((Window)rel).getInput().getRowType().getFieldCount();
@@ -88,10 +101,7 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
           }
         }
         return mq.getDistinctRowCount(((SingleRel) rel).getInput(), groupKey, predicate);
-    } else if (rel instanceof DrillJoinRelBase) {
-      if (DrillRelOptUtil.guessRows(rel)) {
-        return super.getDistinctRowCount(rel, mq, groupKey, predicate);
-      }
+    } else if (rel instanceof DrillJoinRelBase && !DrillRelOptUtil.guessRows(rel)) {
       //Assume ndv is unaffected by the join
       return getDistinctRowCount(((DrillJoinRelBase) rel), mq, groupKey, predicate);
     } else if (rel instanceof RelSubset && !DrillRelOptUtil.guessRows(rel)) {
@@ -99,12 +109,9 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
         return mq.getDistinctRowCount(((RelSubset)rel).getBest(), groupKey, predicate);
       } else if (((RelSubset) rel).getOriginal() != null) {
         return mq.getDistinctRowCount(((RelSubset)rel).getOriginal(), groupKey, predicate);
-      } else {
-        return super.getDistinctRowCount(rel, mq, groupKey, predicate);
       }
-    } else {
-      return super.getDistinctRowCount(rel, mq, groupKey, predicate);
     }
+    return super.getDistinctRowCount(rel, mq, groupKey, predicate);
   }
 
   /**
@@ -112,27 +119,9 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
    * set of columns indicated by groupKey.
    * column").
    */
-  private Double getDistinctRowCount(TableScan scan, RelMetadataQuery mq, ImmutableBitSet groupKey,
-      RexNode predicate) {
-    DrillTable table = scan.getTable().unwrap(DrillTable.class);
-    if (table == null) {
-      table = scan.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
-    }
-    return getDistinctRowCountInternal(scan, mq, table, groupKey, scan.getRowType(), predicate);
-  }
-
-  private Double getDistinctRowCountInternal(RelNode scan, RelMetadataQuery mq, DrillTable table,
-      ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
+  private Double getDistinctRowCount(DrillScanRelBase scan, RelMetadataQuery mq, DrillTable table,
+                                     ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
     double selectivity, rowCount;
-    // If guessing, return NDV as 0.1 * rowCount
-    if (DrillRelOptUtil.guessRows(scan)) {
-      /* If there is no table or metadata (stats) table associated with scan, estimate the
-       * distinct row count. Consistent with the estimation of Aggregate row count in
-       * RelMdRowCount: distinctRowCount = rowCount * 10%.
-       */
-      return scan.estimateRowCount(mq) * 0.1;
-    }
-
     /* If predicate is present, determine its selectivity to estimate filtered rows.
      * Thereafter, compute the number of distinct rows.
      */
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
index f854480..f9eb41b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
@@ -18,16 +18,15 @@
 package org.apache.drill.exec.planner.cost;
 
 import java.io.IOException;
-
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.SingleRel;
 import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.Sort;
-import org.apache.calcite.rel.core.Union;
-import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.core.Union;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdRowCount;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
@@ -37,9 +36,9 @@ import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.drill.exec.planner.common.DrillLimitRelBase;
 import org.apache.drill.exec.planner.common.DrillRelOptUtil;
 import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
 import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.apache.drill.exec.planner.physical.PrelUtil;
+import org.apache.drill.exec.util.Utilities;
 
 
 public class DrillRelMdRowCount extends RelMdRowCount{
@@ -101,16 +100,12 @@ public class DrillRelMdRowCount extends RelMdRowCount{
   }
 
   private Double getRowCountInternal(TableScan rel, RelMetadataQuery mq) {
-    DrillTable table;
+    DrillTable table = Utilities.getDrillTable(rel.getTable());
     PlannerSettings settings = PrelUtil.getSettings(rel.getCluster());
     // If guessing, return selectivity from RelMDRowCount
     if (DrillRelOptUtil.guessRows(rel)) {
       return super.getRowCount(rel, mq);
     }
-    table = rel.getTable().unwrap(DrillTable.class);
-    if (table == null) {
-      table = rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
-    }
     // Return rowcount from statistics, if available. Otherwise, delegate to parent.
     try {
       if (table != null
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
index ec72822..aae8b1d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java
@@ -47,10 +47,10 @@ import org.apache.drill.exec.planner.common.DrillRelOptUtil;
 import org.apache.drill.exec.planner.common.DrillScanRelBase;
 import org.apache.drill.exec.planner.logical.DrillScanRel;
 import org.apache.drill.exec.planner.logical.DrillTable;
-import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
 import org.apache.drill.exec.planner.physical.PlannerSettings;
 import org.apache.drill.exec.planner.physical.PrelUtil;
 import org.apache.drill.exec.planner.physical.ScanPrel;
+import org.apache.drill.exec.util.Utilities;
 
 public class DrillRelMdSelectivity extends RelMdSelectivity {
   private static final DrillRelMdSelectivity INSTANCE = new DrillRelMdSelectivity();
@@ -117,10 +117,7 @@ public class DrillRelMdSelectivity extends RelMdSelectivity {
       if (DrillRelOptUtil.guessRows(rel)) {
         return super.getSelectivity(rel, mq, predicate);
       }
-      DrillTable table = rel.getTable().unwrap(DrillTable.class);
-      if (table == null) {
-        table = rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
-      }
+      DrillTable table = Utilities.getDrillTable(rel.getTable());
       if (table != null && table.getStatsTable() != null && table.getStatsTable().isMaterialized()) {
         if (rel instanceof DrillScanRelBase) {
           List<String> fieldNames = new ArrayList<>();