You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2015/02/27 23:23:01 UTC
[2/2] drill git commit: DRILL-1742: Use Hive stats when planning
queries on Hive data sources
DRILL-1742: Use Hive stats when planning queries on Hive data sources
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/02d23cb8
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/02d23cb8
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/02d23cb8
Branch: refs/heads/master
Commit: 02d23cb8dc21dfb151aa3d2fc64404c81d0b23cc
Parents: 853ac26
Author: adeneche <ad...@gmail.com>
Authored: Tue Jan 20 12:23:58 2015 -0800
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Feb 27 13:48:10 2015 -0800
----------------------------------------------------------------------
.../apache/drill/exec/store/hive/HiveScan.java | 24 +++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/02d23cb8/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
index 35db8ef..b96fda4 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
@@ -87,6 +87,12 @@ public class HiveScan extends AbstractGroupScan {
@JsonIgnore
Map<InputSplit, Partition> partitionMap = new HashMap();
+ /*
+ * total number of rows (obtained from metadata store)
+ */
+ @JsonIgnore
+ private long rowCount = 0;
+
@JsonCreator
public HiveScan(@JsonProperty("hive-table") HiveReadEntry hiveReadEntry,
@JsonProperty("storage-plugin") String storagePluginName,
@@ -118,6 +124,7 @@ public class HiveScan extends AbstractGroupScan {
this.partitionMap = that.partitionMap;
this.storagePlugin = that.storagePlugin;
this.storagePluginName = that.storagePluginName;
+ this.rowCount = that.rowCount;
}
public List<SchemaPath> getColumns() {
@@ -168,6 +175,16 @@ public class HiveScan extends AbstractGroupScan {
partitionMap.put(split, partition);
}
}
+ final String numRowsProp = properties.getProperty("numRows");
+ logger.trace("HiveScan num rows property = {}", numRowsProp);
+ if (numRowsProp != null) {
+ final int numRows = Integer.valueOf(numRowsProp);
+ // starting from hive-0.13, when no statistics are available, this property is set to -1
+ // it's important to note that the value returned by hive may not be up to date
+ if (numRows > 0) {
+ rowCount += numRows;
+ }
+ }
}
@Override
@@ -268,7 +285,12 @@ public class HiveScan extends AbstractGroupScan {
data += split.getLength();
}
- long estRowCount = data/1024;
+ long estRowCount = rowCount;
+ if (estRowCount == 0) {
+ // having a rowCount of 0 can mean the statistics were never computed
+ estRowCount = data/1024;
+ }
+ logger.debug("estimated row count = {}, stats row count = {}", estRowCount, rowCount);
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, estRowCount, 1, data);
} catch (IOException e) {
throw new DrillRuntimeException(e);