You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jb...@apache.org on 2019/01/27 03:40:47 UTC
[impala] 01/04: IMPALA-8058: Fallback for HBase key scan range estimation

This is an automated email from the ASF dual-hosted git repository.

jbapple pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dccb97ba795f58d76d5e0e664685ec52754f059f
Author: paul-rogers <pr...@cloudera.com>
AuthorDate: Tue Jan 8 19:01:34 2019 -0800

    IMPALA-8058: Fallback for HBase key scan range estimation
    
    Impala supports "pushing" of HBase key range predicates to HBase so that
    Impala reads only rows within the target key range. The planner
    estimates the cardinality of such scans by sampling the rows within the
    range. However, we have seen cases where sampling returns rows for
    unknown reasons. The planner then ends up without a good cardinality
    estimate.  (Specifically, the code does a division by zero and produces
    a huge estimate.  See the ticket for details.)
    
    Impala appears to use the sampling strategy to compute cardinality
    because HBase uses generally do not gather table stats. The resulting
    estimates are often off by 2x or more. This is a problem in tests as it
    causes cardinality numbers to vary greatly from the expected values.
    Fortunately, tests do gather HMS stats. There may be cases where users
    do as well. This fix exploits that fact.
    
    This fix:
    
    * Creates a fall-back strategy that uses table cardinality from HMS and
      the selectivity of the key predicates to estimate cardinality when the
      sampling approach fails.
    * The fall-back strategy requires tracking the predicates used for HBase
      keys so that their selectivity can be applied during fall-back
      calculations.
    * Moved HBase key calculation out of the SingleNodePlanner into the
      HBase scan node as suggested by a "TO DO" in the code. Doing so
      simplified the new code.
    * In the spirit of IMPALA-7919, adds the key predicates to the HBase
      scan node in the EXPLAIN output.
    
    Testing:
    
    * Adds a query context option to disable the normal key sampling to
      force the use of the fall-back. Used for testing.
    * Adds a new set of HBase test cases that use the new feature to check
      plans with the fall-back approach.
    * Reran all existing tests.
    * Compared cardinality numbers for the two modes: sampling and HMS using
      the cardinality features of IMPALA-8021. The two approaches provide
      different results, but this is mostly due to the missing selectivity
      estimates for inequality operators. (That's a fix for another time.)
    
    Change-Id: Ic01147abcb6b184071ba28b55aedc3bc49b322ce
    Reviewed-on: http://gerrit.cloudera.org:8080/12192
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 common/thrift/ImpalaInternalService.thrift         |   6 +
 .../org/apache/impala/catalog/FeHBaseTable.java    |  11 +
 .../org/apache/impala/planner/HBaseScanNode.java   | 175 +++++++++---
 .../apache/impala/planner/SingleNodePlanner.java   |  82 +-----
 .../org/apache/impala/planner/PlannerTest.java     |  20 ++
 .../org/apache/impala/planner/PlannerTestBase.java |   9 +-
 .../queries/PlannerTest/constant-folding.test      |   1 +
 .../queries/PlannerTest/hbase-no-key-est.test      | 299 +++++++++++++++++++++
 .../queries/PlannerTest/hbase.test                 |  22 ++
 .../queries/PlannerTest/joins.test                 |   2 +
 10 files changed, 511 insertions(+), 116 deletions(-)

diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 36b883d..a190d83 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -447,6 +447,12 @@ struct TQueryCtx {
   //   system's local timezone and falls back to UTC. This logic will be removed in
   //   IMPALA-7359, which will make this member completely obsolete.
   19: required string local_time_zone
+
+  // Disables the code that estimates HBase scan cardinality from key ranges.
+  // When disabled, scan cardinality is estimated from HMS table row count
+  // stats and key column predicate selectivity. Generally only disabled
+  // for testing.
+  20: optional bool disable_hbase_row_est = false;
 }
 
 // Specification of one output destination of a plan fragment
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeHBaseTable.java b/fe/src/main/java/org/apache/impala/catalog/FeHBaseTable.java
index 8d481a1..fbb6d5d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeHBaseTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeHBaseTable.java
@@ -58,6 +58,7 @@ import org.apache.impala.util.StatsHelper;
 import org.apache.impala.util.TResultRowBuilder;
 import org.apache.log4j.Logger;
 
+import com.google.common.base.Charsets;
 import com.google.common.base.Preconditions;
 
 public interface FeHBaseTable extends FeTable {
@@ -401,6 +402,16 @@ public interface FeHBaseTable extends FeTable {
         }
         if (totalSize == 0) {
           rowCount = totalEstimatedRows;
+        } else if (statsSize.mean() < 1) {
+          // No meaningful row width found. The < 1 handles both the
+          // no row case and the potential case where the average is
+          // too small to be meaningful.
+          LOG.warn(String.format("Table %s: no data available to compute " +
+              "row count estimate for key range ('%s', '%s')",
+              tbl.getFullName(),
+              new String(startRowKey, Charsets.UTF_8),
+              new String(endRowKey, Charsets.UTF_8)));
+          return new Pair<>(-1L, -1L);
         } else {
           rowCount = (long) (totalSize / statsSize.mean());
         }
diff --git a/fe/src/main/java/org/apache/impala/planner/HBaseScanNode.java b/fe/src/main/java/org/apache/impala/planner/HBaseScanNode.java
index 289f945..808ab41 100644
--- a/fe/src/main/java/org/apache/impala/planner/HBaseScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HBaseScanNode.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.Map;
 
 import org.apache.hadoop.hbase.HConstants;
@@ -37,6 +38,7 @@ import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.StringLiteral;
 import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.catalog.FeHBaseTable;
+import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.HBaseColumn;
 import org.apache.impala.catalog.PrimitiveType;
 import org.apache.impala.catalog.Type;
@@ -62,13 +64,34 @@ import org.slf4j.LoggerFactory;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 
 /**
  * Full scan of an HBase table.
  * Only families/qualifiers specified in TupleDescriptor will be retrieved in the backend.
  */
 public class HBaseScanNode extends ScanNode {
+  // The suggested value for "hbase.client.scan.setCaching", which batches maxCaching
+  // rows per fetch request to the HBase region server. If the value is too high,
+  // then the hbase region server will have a hard time (GC pressure and long response
+  // times). If the value is too small, then there will be extra trips to the hbase
+  // region server.
+  // Default to 1024 and update it based on row size estimate such that each batch size
+  // won't exceed 500MB.
+  private final static int MAX_HBASE_FETCH_BATCH_SIZE = 500 * 1024 * 1024;
+  private final static int DEFAULT_SUGGESTED_CACHING = 1024;
+
+  // Used for memory estimation when the column max size stat is missing (happens only
+  // in case of string type columns).
+  private final static int DEFAULT_STRING_COL_BYTES = 32 * 1024;
+
+  // Used for memory estimation to clamp the max estimate to 128 MB in case of
+  // missing stats.
+  private final static int DEFAULT_MAX_ESTIMATE_BYTES = 128 * 1024 * 1024;
+
+  // Used for memory estimation to clamp the min estimate to 4 KB which is min
+  // block size that can be allocated by the mem-pool.
+  private final static int DEFAULT_MIN_ESTIMATE_BYTES = 4 * 1024;
+
   private final static Logger LOG = LoggerFactory.getLogger(HBaseScanNode.class);
   private final TupleDescriptor desc_;
 
@@ -76,7 +99,11 @@ public class HBaseScanNode extends ScanNode {
   // A null entry means there's no range restriction for that particular key.
   // If keyRanges is non-null it always contains as many entries as there are clustering
   // cols.
-  private List<ValueRange> keyRanges_;
+  private List<ValueRange> keyRanges_ = new ArrayList<>();
+
+  // The list of conjuncts used to create the key ranges. Used if we
+  // must estimate cardinality based on row count stats.
+  private List<Expr> keyConjuncts_ = new ArrayList<>();
 
   // derived from keyRanges_; empty means unbounded;
   // initialize start/stopKey_ to be unbounded.
@@ -91,41 +118,30 @@ public class HBaseScanNode extends ScanNode {
   // List of HBase Filters for generating thrift message. Filled in finalize().
   private final List<THBaseFilter> filters_ = new ArrayList<>();
 
-  // The suggested value for "hbase.client.scan.setCaching", which batches maxCaching
-  // rows per fetch request to the HBase region server. If the value is too high,
-  // then the hbase region server will have a hard time (GC pressure and long response
-  // times). If the value is too small, then there will be extra trips to the hbase
-  // region server.
-  // Default to 1024 and update it based on row size estimate such that each batch size
-  // won't exceed 500MB.
-  private final static int MAX_HBASE_FETCH_BATCH_SIZE = 500 * 1024 * 1024;
-  private final static int DEFAULT_SUGGESTED_CACHING = 1024;
   private int suggestedCaching_ = DEFAULT_SUGGESTED_CACHING;
 
-  // Used for memory estimation when the column max size stat is missing (happens only
-  // in case of string type columns).
-  private final static int DEFAULT_STRING_COL_BYTES = 32 * 1024;
-
-  // Used for memory estimation to clamp the max estimate to 128 MB in case of
-  // missing stats.
-  private final static int DEFAULT_MAX_ESTIMATE_BYTES = 128 * 1024 * 1024;
-
-  // Used for memory estimation to clamp the min estimate to 4 KB which is min
-  // block size that can be allocated by the mem-pool.
-  private final static int DEFAULT_MIN_ESTIMATE_BYTES = 4 * 1024;
-
   public HBaseScanNode(PlanNodeId id, TupleDescriptor desc) {
     super(id, desc, "SCAN HBASE");
     desc_ = desc;
   }
 
-  public void setKeyRanges(List<ValueRange> keyRanges) {
-    Preconditions.checkNotNull(keyRanges);
-    keyRanges_ = keyRanges;
-  }
-
   @Override
   public void init(Analyzer analyzer) throws ImpalaException {
+    FeTable table = desc_.getTable();
+    // determine scan predicates for clustering cols
+    for (int i = 0; i < table.getNumClusteringCols(); ++i) {
+      SlotDescriptor slotDesc = analyzer.getColumnSlot(
+          desc_, table.getColumns().get(i));
+      if (slotDesc == null || !slotDesc.getType().isStringType()) {
+        // the hbase row key is mapped to a non-string type
+        // (since it's stored in ASCII it will be lexicographically ordered,
+        // and non-string comparisons won't work)
+        keyRanges_.add(null);
+      } else {
+        keyRanges_.add(createHBaseValueRange(slotDesc));
+      }
+    }
+
     checkForSupportedFileFormats();
     assignConjuncts(analyzer);
     conjuncts_ = orderConjunctsByCost(conjuncts_);
@@ -144,13 +160,70 @@ public class HBaseScanNode extends ScanNode {
   }
 
   /**
+   * Transform '=', '<[=]' and '>[=]' comparisons for given slot into
+   * ValueRange. Also removes those predicates which were used for the construction
+   * of ValueRange from 'conjuncts_'. Only looks at comparisons w/ string constants
+   * (ie, the bounds of the result can be evaluated with Expr::GetValue(NULL)).
+   * HBase row key filtering works only if the row key is mapped to a string column and
+   * the expression is a string constant expression.
+   * If there are multiple competing comparison predicates that could be used
+   * to construct a ValueRange, only the first one from each category is chosen.
+   */
+  private ValueRange createHBaseValueRange(SlotDescriptor d) {
+    ListIterator<Expr> i = conjuncts_.listIterator();
+    ValueRange result = null;
+    while (i.hasNext()) {
+      Expr e = i.next();
+      if (!(e instanceof BinaryPredicate)) continue;
+      BinaryPredicate comp = (BinaryPredicate) e;
+      if ((comp.getOp() == BinaryPredicate.Operator.NE)
+          || (comp.getOp() == BinaryPredicate.Operator.DISTINCT_FROM)
+          || (comp.getOp() == BinaryPredicate.Operator.NOT_DISTINCT)) {
+        continue;
+      }
+      Expr slotBinding = comp.getSlotBinding(d.getId());
+      if (slotBinding == null || !slotBinding.isConstant() ||
+          !slotBinding.getType().equals(Type.STRING)) {
+        continue;
+      }
+
+      if (comp.getOp() == BinaryPredicate.Operator.EQ) {
+        i.remove();
+        keyConjuncts_.add(e);
+        return ValueRange.createEqRange(slotBinding);
+      }
+
+      if (result == null) result = new ValueRange();
+
+      // TODO: do we need copies here?
+      if (comp.getOp() == BinaryPredicate.Operator.GT
+          || comp.getOp() == BinaryPredicate.Operator.GE) {
+        if (result.getLowerBound() == null) {
+          result.setLowerBound(slotBinding);
+          result.setLowerBoundInclusive(comp.getOp() == BinaryPredicate.Operator.GE);
+          i.remove();
+          keyConjuncts_.add(e);
+        }
+      } else {
+        if (result.getUpperBound() == null) {
+          result.setUpperBound(slotBinding);
+          result.setUpperBoundInclusive(comp.getOp() == BinaryPredicate.Operator.LE);
+          i.remove();
+          keyConjuncts_.add(e);
+       }
+      }
+    }
+    return result;
+  }
+
+  /**
    * Convert keyRanges_ to startKey_ and stopKey_.
    * If ValueRange is not null, transform it into start/stopKey_ by evaluating the
    * expression. Analysis has checked that the expression is string type. If the
    * expression evaluates to null, then there's nothing to scan because Hbase row key
    * cannot be null.
    * At present, we only do row key filtering for string-mapped keys. String-mapped keys
-   * are always encded as ascii.
+   * are always encoded as ASCII.
    * ValueRange is null if there is no predicate on the row-key.
    */
   private void setStartStopKey(Analyzer analyzer) throws ImpalaException {
@@ -165,6 +238,10 @@ public class HBaseScanNode extends ScanNode {
             rowRange.getLowerBound().getType().equals(Type.STRING));
         LiteralExpr val = LiteralExpr.create(rowRange.getLowerBound(),
             analyzer.getQueryCtx());
+        // TODO: Make this a Preconditions.checkState(). If we get here,
+        // and the value is not a string literal, then we've got a predicate
+        // that we removed from the conjunct list, but which we won't evaluate
+        // as a key. That is, we'll produce wrong query results.
         if (val instanceof StringLiteral) {
           StringLiteral litVal = (StringLiteral) val;
           startKey_ = convertToBytes(litVal.getUnescapedValue(),
@@ -214,13 +291,35 @@ public class HBaseScanNode extends ScanNode {
     } else if (rowRange != null && rowRange.isEqRange()) {
       cardinality_ = 1;
     } else {
-      // Set maxCaching so that each fetch from hbase won't return a batch of more than
-      // MAX_HBASE_FETCH_BATCH_SIZE bytes.
-      Pair<Long, Long> estimate = tbl.getEstimatedRowStats(startKey_, stopKey_);
-      cardinality_ = estimate.first.longValue();
-      if (estimate.second.longValue() > 0) {
-        suggestedCaching_ = (int)
-            Math.max(MAX_HBASE_FETCH_BATCH_SIZE / estimate.second.longValue(), 1);
+      Pair<Long, Long> estimate;
+      if (analyzer.getQueryCtx().isDisable_hbase_row_est()) {
+        estimate = new Pair<>(-1L, -1L);
+      } else {
+        // Set maxCaching so that each fetch from hbase won't return a batch of more than
+        // MAX_HBASE_FETCH_BATCH_SIZE bytes.
+        // May return -1 for the estimate if insufficient data is available.
+        estimate = tbl.getEstimatedRowStats(startKey_, stopKey_);
+      }
+      if (estimate.first == -1) {
+        // No useful estimate. Rely on HMS row count stats.
+        // This works only if HBase stats are available in HMS. This is true
+        // for the Impala tests, and may be true for some applications.
+        cardinality_ = tbl.getTTableStats().getNum_rows();
+        // TODO: What do do if neither HBase nor HMS provide a row count estimate?
+        // Is there some third, ulitimate fallback?
+        // Apply estimated key range selectivity from original key conjuncts
+        if (cardinality_ != -1 && keyConjuncts_ != null) {
+          cardinality_ *= computeCombinedSelectivity(keyConjuncts_);
+        }
+      } else {
+        // Use the HBase sampling scan to estimate cardinality. Note that,
+        // in tests, this estimate has proven to be very rough: off by
+        // 2x or more.
+        cardinality_ = estimate.first;
+        if (estimate.second > 0) {
+          suggestedCaching_ = (int)
+              Math.max(MAX_HBASE_FETCH_BATCH_SIZE / estimate.second.longValue(), 1);
+        }
       }
     }
     inputCardinality_ = cardinality_;
@@ -428,6 +527,10 @@ public class HBaseScanNode extends ScanNode {
     output.append(String.format("%s%s:%s [%s%s]\n", prefix, id_.toString(),
         displayName_, table.getFullName(), aliasStr));
     if (detailLevel.ordinal() >= TExplainLevel.STANDARD.ordinal()) {
+      if (!keyConjuncts_.isEmpty()) {
+        output.append(detailPrefix
+            + "key predicates: " + getExplainString(keyConjuncts_, detailLevel) + "\n");
+      }
       if (!Bytes.equals(startKey_, HConstants.EMPTY_START_ROW)) {
         output.append(detailPrefix + "start key: " + printKey(startKey_) + "\n");
       }
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index a31bb50..6bd4e65 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -24,7 +24,6 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
-import java.util.ListIterator;
 import java.util.Map;
 import java.util.Set;
 
@@ -63,7 +62,6 @@ import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeHBaseTable;
 import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
-import org.apache.impala.catalog.Type;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.NotImplementedException;
@@ -1029,60 +1027,6 @@ public class SingleNodePlanner {
   }
 
   /**
-   * Transform '=', '<[=]' and '>[=]' comparisons for given slot into
-   * ValueRange. Also removes those predicates which were used for the construction
-   * of ValueRange from 'conjuncts_'. Only looks at comparisons w/ string constants
-   * (ie, the bounds of the result can be evaluated with Expr::GetValue(NULL)).
-   * HBase row key filtering works only if the row key is mapped to a string column and
-   * the expression is a string constant expression.
-   * If there are multiple competing comparison predicates that could be used
-   * to construct a ValueRange, only the first one from each category is chosen.
-   */
-  private ValueRange createHBaseValueRange(SlotDescriptor d, List<Expr> conjuncts) {
-    ListIterator<Expr> i = conjuncts.listIterator();
-    ValueRange result = null;
-    while (i.hasNext()) {
-      Expr e = i.next();
-      if (!(e instanceof BinaryPredicate)) continue;
-      BinaryPredicate comp = (BinaryPredicate) e;
-      if ((comp.getOp() == BinaryPredicate.Operator.NE)
-          || (comp.getOp() == BinaryPredicate.Operator.DISTINCT_FROM)
-          || (comp.getOp() == BinaryPredicate.Operator.NOT_DISTINCT)) {
-        continue;
-      }
-      Expr slotBinding = comp.getSlotBinding(d.getId());
-      if (slotBinding == null || !slotBinding.isConstant() ||
-          !slotBinding.getType().equals(Type.STRING)) {
-        continue;
-      }
-
-      if (comp.getOp() == BinaryPredicate.Operator.EQ) {
-        i.remove();
-        return ValueRange.createEqRange(slotBinding);
-      }
-
-      if (result == null) result = new ValueRange();
-
-      // TODO: do we need copies here?
-      if (comp.getOp() == BinaryPredicate.Operator.GT
-          || comp.getOp() == BinaryPredicate.Operator.GE) {
-        if (result.getLowerBound() == null) {
-          result.setLowerBound(slotBinding);
-          result.setLowerBoundInclusive(comp.getOp() == BinaryPredicate.Operator.GE);
-          i.remove();
-        }
-      } else {
-        if (result.getUpperBound() == null) {
-          result.setUpperBound(slotBinding);
-          result.setUpperBoundInclusive(comp.getOp() == BinaryPredicate.Operator.LE);
-          i.remove();
-        }
-      }
-    }
-    return result;
-  }
-
-  /**
    * Returns plan tree for an inline view ref:
    * - predicates from the enclosing scope that can be evaluated directly within
    *   the inline-view plan are pushed down
@@ -1381,6 +1325,9 @@ public class SingleNodePlanner {
     } else if (table instanceof FeHBaseTable) {
       // HBase table
       scanNode = new HBaseScanNode(ctx_.getNextNodeId(), tblRef.getDesc());
+      scanNode.addConjuncts(conjuncts);
+      scanNode.init(analyzer);
+      return scanNode;
     } else if (tblRef.getTable() instanceof FeKuduTable) {
       scanNode = new KuduScanNode(ctx_.getNextNodeId(), tblRef.getDesc(), conjuncts);
       scanNode.init(analyzer);
@@ -1389,29 +1336,6 @@ public class SingleNodePlanner {
       throw new NotImplementedException(
           "Planning not implemented for table ref class: " + tblRef.getClass());
     }
-    // TODO: move this to HBaseScanNode.init();
-    Preconditions.checkState(scanNode instanceof HBaseScanNode);
-    List<ValueRange> keyRanges = new ArrayList<>();
-    // determine scan predicates for clustering cols
-    for (int i = 0; i < tblRef.getTable().getNumClusteringCols(); ++i) {
-      SlotDescriptor slotDesc = analyzer.getColumnSlot(
-          tblRef.getDesc(), tblRef.getTable().getColumns().get(i));
-      if (slotDesc == null || !slotDesc.getType().isStringType()) {
-        // the hbase row key is mapped to a non-string type
-        // (since it's stored in ascii it will be lexicographically ordered,
-        // and non-string comparisons won't work)
-        keyRanges.add(null);
-      } else {
-        // create ValueRange from conjuncts_ for slot; also removes conjuncts_ that were
-        // used as input for filter
-        keyRanges.add(createHBaseValueRange(slotDesc, conjuncts));
-      }
-    }
-
-    ((HBaseScanNode)scanNode).setKeyRanges(keyRanges);
-    scanNode.addConjuncts(conjuncts);
-    scanNode.init(analyzer);
-    return scanNode;
   }
 
   /**
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 26c0438..4345bfb 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -130,6 +130,26 @@ public class PlannerTest extends PlannerTestBase {
     runPlannerTestFile("hbase");
   }
 
+  /**
+   * Test of HBase in the case of disabling the key scan.
+   * Normally the HBase scan node goes out to HBase to query the
+   * set of keys within the target key range. There are times when this
+   * can fail. In these times we fall back to using HMS row count and
+   * the estimated key predicate cardinality (which will use key column
+   * NDV.) It is hard to test this case in "real life" with an actual
+   * HBase cluster. Instead, we simply disable the key scan via an
+   * option, then rerun all HBase tests with keys.
+   *
+   * TODO: Once node cardinality is available (IMPALA-8021), compare
+   * estimated cardinality with both methods to ensure we get adequate
+   * estimates.
+   */
+  @Test
+  public void testHbaseNoKeyEstimate() {
+    runPlannerTestFile("hbase-no-key-est",
+        ImmutableSet.of(PlannerTestOption.DISABLE_HBASE_KEY_ESTIMATE));
+  }
+
   @Test
   public void testInsert() {
     runPlannerTestFile("insert");
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index d9e168f..80afd92 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -415,6 +415,8 @@ public class PlannerTestBase extends FrontendTestBase {
     TQueryCtx queryCtx = TestUtils.createQueryContext(
         dbName, System.getProperty("user.name"));
     queryCtx.client_request.query_options = testCase.getOptions();
+    queryCtx.setDisable_hbase_row_est(
+        testOptions.contains(PlannerTestOption.DISABLE_HBASE_KEY_ESTIMATE));
     // Test single node plan, scan range locations, and column lineage.
     TExecRequest singleNodeExecRequest = testPlan(testCase, Section.PLAN, queryCtx.deepCopy(),
         testOptions, errorLog, actualOutput);
@@ -812,7 +814,12 @@ public class PlannerTestBase extends FrontendTestBase {
     // to ignore these values (for backward compatibility.) Turn this option
     // on for test that validate cardinality calculations: joins, scan
     // cardinality, etc.
-    VALIDATE_CARDINALITY
+    VALIDATE_CARDINALITY,
+    // If set, disables the normal HBase key estimate scan in favor of using
+    // HMS table stats and key predicate selectivity. Enable this to test
+    // the case when HBase key stats are unavailable (such as due to overly
+    // restrictive key predicates).
+    DISABLE_HBASE_KEY_ESTIMATE
   }
 
   protected void runPlannerTestFile(String testFile, TQueryOptions options) {
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
index 993f432..ad45570 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
@@ -88,6 +88,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B thread-reservation=0
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '10', id <= '20'
    start key: 10
    stop key: 20\0
    hbase filters: d:string_col EQUAL '4'
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase-no-key-est.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase-no-key-est.test
new file mode 100644
index 0000000..e60ec9f
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase-no-key-est.test
@@ -0,0 +1,299 @@
+# if the row key is mapped as a string col, range predicates are applied to the scan
+select * from functional_hbase.stringids
+where id = '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id = '5'
+   start key: 5
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=1
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 5:5\0
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id = '5'
+   start key: 5
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=1
+====
+select * from functional_hbase.stringids
+where id > '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '5'
+   start key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 5\0:7
+  HBASE KEYRANGE 7:<unbounded>
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '5'
+   start key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id >= '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '5'
+   start key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 5:7
+  HBASE KEYRANGE 7:<unbounded>
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '5'
+   start key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id < '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id < '5'
+   stop key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 3:5
+  HBASE KEYRANGE <unbounded>:3
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id < '5'
+   stop key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id <= '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id <= '5'
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id > '4' and id < '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id < '5'
+   start key: 4\0
+   stop key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 4\0:5
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id < '5'
+   start key: 4\0
+   stop key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id >= '4' and id < '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id < '5'
+   start key: 4
+   stop key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 4:5
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id < '5'
+   start key: 4
+   stop key: 5
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id > '4' and id <= '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id <= '5'
+   start key: 4\0
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 4\0:5\0
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id <= '5'
+   start key: 4\0
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+select * from functional_hbase.stringids
+where id >= '4' and id <= '5'
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
+   start key: 4
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 4:5\0
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
+   start key: 4
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=111
+====
+# mix of predicates, functional_hbase. filters and start/stop keys
+select * from functional_hbase.stringids
+where string_col = '4' and tinyint_col = 5 and id >= '4' and id <= '5'
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
+   start key: 4
+   stop key: 5\0
+   hbase filters: d:string_col EQUAL '4'
+   predicates: tinyint_col = 5, string_col = '4'
+   row-size=107B cardinality=1
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 4:5\0
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
+   start key: 4
+   stop key: 5\0
+   hbase filters: d:string_col EQUAL '4'
+   predicates: tinyint_col = 5, string_col = '4'
+   row-size=107B cardinality=1
+====
+# IMP-1188 - row key predicate is a constant expr.
+select * from functional_hbase.stringids
+where id = concat('', '5')
+and tinyint_col = 5
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id = '5'
+   start key: 5
+   stop key: 5\0
+   predicates: tinyint_col = 5
+   row-size=107B cardinality=1
+====
+# IMP-1188 - row key predicate is a constant expr.
+select * from functional_hbase.stringids
+where string_col = '4' and tinyint_col = 5
+  and id >= concat('', '4') and id <= concat('5', '')
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
+   start key: 4
+   stop key: 5\0
+   hbase filters: d:string_col EQUAL '4'
+   predicates: tinyint_col = 5, string_col = '4'
+   row-size=107B cardinality=1
+---- SCANRANGELOCATIONS
+NODE 0:
+  HBASE KEYRANGE 4:5\0
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
+   start key: 4
+   stop key: 5\0
+   hbase filters: d:string_col EQUAL '4'
+   predicates: tinyint_col = 5, string_col = '4'
+   row-size=107B cardinality=1
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
index e52addf..886fb05 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
@@ -38,6 +38,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id = '5'
    start key: 5
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -51,6 +52,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id = '5'
    start key: 5
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -63,6 +65,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '5'
    start key: 5\0
    predicates: tinyint_col = 5
    row-size=107B cardinality=953
@@ -76,6 +79,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '5'
    start key: 5\0
    predicates: tinyint_col = 5
    row-size=107B cardinality=953
@@ -87,6 +91,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '5'
    start key: 5
    predicates: tinyint_col = 5
    row-size=107B cardinality=953
@@ -100,6 +105,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '5'
    start key: 5
    predicates: tinyint_col = 5
    row-size=107B cardinality=953
@@ -111,6 +117,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id < '5'
    stop key: 5
    predicates: tinyint_col = 5
    row-size=107B cardinality=969
@@ -124,6 +131,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id < '5'
    stop key: 5
    predicates: tinyint_col = 5
    row-size=107B cardinality=969
@@ -135,6 +143,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id <= '5'
    stop key: 5\0
    predicates: tinyint_col = 5
    row-size=107B cardinality=1.45K
@@ -146,6 +155,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id < '5'
    start key: 4\0
    stop key: 5
    predicates: tinyint_col = 5
@@ -159,6 +169,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id < '5'
    start key: 4\0
    stop key: 5
    predicates: tinyint_col = 5
@@ -171,6 +182,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id < '5'
    start key: 4
    stop key: 5
    predicates: tinyint_col = 5
@@ -184,6 +196,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id < '5'
    start key: 4
    stop key: 5
    predicates: tinyint_col = 5
@@ -196,6 +209,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id <= '5'
    start key: 4\0
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -209,6 +223,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id > '4', id <= '5'
    start key: 4\0
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -221,6 +236,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
    start key: 4
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -234,6 +250,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
    start key: 4
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -429,6 +446,7 @@ where string_col = '4' and tinyint_col = 5 and id >= '4' and id <= '5'
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
    start key: 4
    stop key: 5\0
    hbase filters: d:string_col EQUAL '4'
@@ -443,6 +461,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
    start key: 4
    stop key: 5\0
    hbase filters: d:string_col EQUAL '4'
@@ -475,6 +494,7 @@ and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id = '5'
    start key: 5
    stop key: 5\0
    predicates: tinyint_col = 5
@@ -488,6 +508,7 @@ where string_col = '4' and tinyint_col = 5
 PLAN-ROOT SINK
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
    start key: 4
    stop key: 5\0
    hbase filters: d:string_col EQUAL '4'
@@ -502,6 +523,7 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN HBASE [functional_hbase.stringids]
+   key predicates: id >= '4', id <= '5'
    start key: 4
    stop key: 5\0
    hbase filters: d:string_col EQUAL '4'
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index 409f979..1321c6a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -406,6 +406,7 @@ PLAN-ROOT SINK
 |  row-size=202B cardinality=11
 |
 |--01:SCAN HBASE [functional_hbase.stringids b]
+|     key predicates: b.id = '5'
 |     start key: 5
 |     stop key: 5\0
 |     predicates: b.tinyint_col = 5
@@ -440,6 +441,7 @@ PLAN-ROOT SINK
 |--03:EXCHANGE [BROADCAST]
 |  |
 |  01:SCAN HBASE [functional_hbase.stringids b]
+|     key predicates: b.id = '5'
 |     start key: 5
 |     stop key: 5\0
 |     predicates: b.tinyint_col = 5