You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2015/02/27 23:23:01 UTC

[2/2] drill git commit: DRILL-1742: Use Hive stats when planning queries on Hive data sources

DRILL-1742: Use Hive stats when planning queries on Hive data sources


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/02d23cb8
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/02d23cb8
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/02d23cb8

Branch: refs/heads/master
Commit: 02d23cb8dc21dfb151aa3d2fc64404c81d0b23cc
Parents: 853ac26
Author: adeneche <ad...@gmail.com>
Authored: Tue Jan 20 12:23:58 2015 -0800
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Feb 27 13:48:10 2015 -0800

----------------------------------------------------------------------
 .../apache/drill/exec/store/hive/HiveScan.java  | 24 +++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/02d23cb8/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
index 35db8ef..b96fda4 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
@@ -87,6 +87,12 @@ public class HiveScan extends AbstractGroupScan {
   @JsonIgnore
   Map<InputSplit, Partition> partitionMap = new HashMap();
 
+  /*
+   * total number of rows (obtained from metadata store)
+   */
+  @JsonIgnore
+  private long rowCount = 0;
+
   @JsonCreator
   public HiveScan(@JsonProperty("hive-table") HiveReadEntry hiveReadEntry,
                   @JsonProperty("storage-plugin") String storagePluginName,
@@ -118,6 +124,7 @@ public class HiveScan extends AbstractGroupScan {
     this.partitionMap = that.partitionMap;
     this.storagePlugin = that.storagePlugin;
     this.storagePluginName = that.storagePluginName;
+    this.rowCount = that.rowCount;
   }
 
   public List<SchemaPath> getColumns() {
@@ -168,6 +175,16 @@ public class HiveScan extends AbstractGroupScan {
         partitionMap.put(split, partition);
       }
     }
+    final String numRowsProp = properties.getProperty("numRows");
+    logger.trace("HiveScan num rows property = {}", numRowsProp);
+    if (numRowsProp != null) {
+      final int numRows = Integer.valueOf(numRowsProp);
+      // starting from hive-0.13, when no statistics are available, this property is set to -1
+      // it's important to note that the value returned by hive may not be up to date
+      if (numRows > 0) {
+        rowCount += numRows;
+      }
+    }
   }
 
   @Override
@@ -268,7 +285,12 @@ public class HiveScan extends AbstractGroupScan {
           data += split.getLength();
       }
 
-      long estRowCount = data/1024;
+      long estRowCount = rowCount;
+      if (estRowCount == 0) {
+        // having a rowCount of 0 can mean the statistics were never computed
+        estRowCount = data/1024;
+      }
+      logger.debug("estimated row count = {}, stats row count = {}", estRowCount, rowCount);
       return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, estRowCount, 1, data);
     } catch (IOException e) {
       throw new DrillRuntimeException(e);