You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2014/08/12 04:13:02 UTC
svn commit: r1617399 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ ql/src/java/org/apac...
Author: sershe
Date: Tue Aug 12 02:13:01 2014
New Revision: 1617399
URL: http://svn.apache.org/r1617399
Log:
HIVE-7616 : pre-size mapjoin hashtable based on statistics (Sergey Shelukhin, reviewed by Gunther Hagleitner, Prasanth J, Mostafa Mokhtar, Gopal V)
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Aug 12 02:13:01 2014
@@ -897,7 +897,12 @@ public class HiveConf extends Configurat
"This controls how many partitions can be scanned for each partitioned table.\n" +
"The default value \"-1\" means no limit."),
- HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000, ""),
+ HIVEHASHTABLEKEYCOUNTADJUSTMENT("hive.hashtable.key.count.adjustment", 1.0f,
+ "Adjustment to mapjoin hashtable size derived from table and column statistics; the estimate" +
+ " of the number of keys is divided by this value. If the value is 0, statistics are not used" +
+ "and hive.hashtable.initialCapacity is used instead."),
+ HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000, "Initial capacity of " +
+ "mapjoin hashtable if statistics are absent, or if hive.hashtable.stats.key.estimate.adjustment is set to 0"),
HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75, ""),
HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55,
"This number means how much memory the local task can take to hold the key/value into an in-memory hash table \n" +
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Aug 12 02:13:01 2014
@@ -181,7 +181,7 @@ public class HashTableSinkOperator exten
if (pos == posBigTableAlias) {
continue;
}
- mapJoinTables[pos] = new HashMapWrapper(hconf);
+ mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java Tue Aug 12 02:13:01 2014
@@ -145,8 +145,7 @@ public final class BytesBytesMultiHashMa
private long[] refs;
private int startingHashBitCount, hashBitCount;
- private int metricPutConflict = 0, metricSameBitsDiffKey = 0,
- metricSameBitsSameKey = 0, metricDiffBits = 0;
+ private int metricPutConflict = 0, metricExpands = 0, metricExpandsUs = 0;
/** We have 39 bits to store list pointer from the first record; this is size limit */
final static long MAX_WB_SIZE = ((long)1) << 38;
@@ -430,16 +429,13 @@ public final class BytesBytesMultiHashMa
*/
private boolean isSameKey(byte[] key, int length, long ref, int hashCode) {
if (!compareHashBits(ref, hashCode)) {
- ++metricDiffBits;
return false; // Hash bits don't match.
}
writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref));
int valueLength = (int)writeBuffers.readVLong(), keyLength = (int)writeBuffers.readVLong();
long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength);
// See the comment in the other isSameKey
- boolean result = writeBuffers.isEqual(key, length, keyOffset, keyLength);
- if (result) { ++metricSameBitsSameKey; } else { ++metricSameBitsDiffKey; }
- return result;
+ return writeBuffers.isEqual(key, length, keyOffset, keyLength);
}
private boolean compareHashBits(long ref, int hashCode) {
@@ -461,6 +457,7 @@ public final class BytesBytesMultiHashMa
}
private void expandAndRehash() {
+ long expandTime = System.nanoTime();
final long[] oldRefs = refs;
long capacity = refs.length << 1;
validateCapacity(capacity);
@@ -492,6 +489,9 @@ public final class BytesBytesMultiHashMa
this.largestNumberOfSteps = maxSteps;
this.hashBitCount = newHashBitCount;
this.resizeThreshold = (int)(capacity * loadFactor);
+ metricExpandsUs += (System.nanoTime() - expandTime);
+ ++metricExpands;
+
}
/**
@@ -703,11 +703,9 @@ public final class BytesBytesMultiHashMa
}
public void debugDumpMetrics() {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Map metrics: keys " + this.keysAssigned + ", write conflict " + metricPutConflict
- + ", write max dist " + largestNumberOfSteps + ", read neq " + metricDiffBits
- + ", read eq-eq " + metricSameBitsSameKey + ", read eq-neq " + metricSameBitsDiffKey);
- }
+ LOG.info("Map metrics: keys allocated " + this.refs.length +", keys assigned " + keysAssigned
+ + ", write conflict " + metricPutConflict + ", write max dist " + largestNumberOfSteps
+ + ", expanded " + metricExpands + " times in " + metricExpandsUs + "us");
}
private void debugDumpKeyProbe(long keyOffset, int keyLength, int hashCode, int finalSlot) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java Tue Aug 12 02:13:01 2014
@@ -71,25 +71,39 @@ public class HashMapWrapper extends Abst
}
public HashMapWrapper() {
- this(HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD.defaultIntVal,
- HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR.defaultFloatVal, false, false);
+ this(HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT.defaultFloatVal,
+ HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD.defaultIntVal,
+ HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR.defaultFloatVal, false, false, -1);
}
- public HashMapWrapper(Configuration hconf) {
- this(HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
+ public HashMapWrapper(Configuration hconf, long keyCount) {
+ this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
+ HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINLAZYHASHTABLE),
- HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDKEYS));
+ HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDKEYS), keyCount);
}
- private HashMapWrapper(
- int threshold, float loadFactor, boolean useLazyRows, boolean useOptimizedKeys) {
+ private HashMapWrapper(float keyCountAdj, int threshold, float loadFactor,
+ boolean useLazyRows, boolean useOptimizedKeys, long keyCount) {
super(createConstructorMetaData(threshold, loadFactor));
+ threshold = calculateTableSize(keyCountAdj, threshold, loadFactor, keyCount);
mHash = new HashMap<MapJoinKey, MapJoinRowContainer>(threshold, loadFactor);
this.useLazyRows = useLazyRows;
this.useOptimizedKeys = useOptimizedKeys;
}
+ public static int calculateTableSize(
+ float keyCountAdj, int threshold, float loadFactor, long keyCount) {
+ if (keyCount >= 0 && keyCountAdj != 0) {
+ // We have statistics for the table. Size appropriately.
+ threshold = (int)Math.ceil(keyCount / (keyCountAdj * loadFactor));
+ }
+ LOG.info("Key count from statistics is " + keyCount + "; setting map size to " + threshold);
+ return threshold;
+ }
+
+
@Override
public MapJoinRowContainer get(MapJoinKey key) {
return mHash.get(key);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java Tue Aug 12 02:13:01 2014
@@ -59,15 +59,17 @@ public class MapJoinBytesTableContainer
private List<Object> EMPTY_LIST = new ArrayList<Object>(0);
- public MapJoinBytesTableContainer(Configuration hconf, MapJoinObjectSerDeContext valCtx)
- throws SerDeException {
- this(HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
+ public MapJoinBytesTableContainer(Configuration hconf,
+ MapJoinObjectSerDeContext valCtx, long keyCount) throws SerDeException {
+ this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
+ HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
- HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), valCtx);
+ HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), valCtx, keyCount);
}
- private MapJoinBytesTableContainer(int threshold, float loadFactor, int wbSize,
- MapJoinObjectSerDeContext valCtx) throws SerDeException {
+ private MapJoinBytesTableContainer(float keyCountAdj, int threshold, float loadFactor,
+ int wbSize, MapJoinObjectSerDeContext valCtx, long keyCount) throws SerDeException {
+ threshold = HashMapWrapper.calculateTableSize(keyCountAdj, threshold, loadFactor, keyCount);
hashMap = new BytesBytesMultiHashMap(threshold, loadFactor, wbSize);
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java Tue Aug 12 02:13:01 2014
@@ -88,6 +88,7 @@ public class HashTableLoader implements
TezContext tezContext = (TezContext) MapredContext.get();
Map<Integer, String> parentToInput = desc.getParentToInput();
+ Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
boolean useOptimizedTables = HiveConf.getBoolVar(
hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
@@ -117,8 +118,11 @@ public class HashTableLoader implements
}
}
isFirstKey = false;
+ Long keyCountObj = parentKeyCounts.get(pos);
+ long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
MapJoinTableContainer tableContainer = useOptimizedTables
- ? new MapJoinBytesTableContainer(hconf, valCtx) : new HashMapWrapper(hconf);
+ ? new MapJoinBytesTableContainer(hconf, valCtx, keyCount)
+ : new HashMapWrapper(hconf, keyCount);
while (kvReader.next()) {
lastKey = tableContainer.putRow(keyCtx, (Writable)kvReader.getCurrentKey(),
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java Tue Aug 12 02:13:01 2014
@@ -26,6 +26,7 @@ import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -33,21 +34,28 @@ import org.apache.hadoop.hive.ql.exec.Op
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.GenTezProcContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.HashTableDummyDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OpTraits;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TezEdgeProperty;
import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
public class ReduceSinkMapJoinProc implements NodeProcessor {
@@ -111,18 +119,59 @@ public class ReduceSinkMapJoinProc imple
if (pos == -1) {
throw new SemanticException("Cannot find position of parent in mapjoin");
}
- LOG.debug("Mapjoin "+mapJoinOp+", pos: "+pos+" --> "+parentWork.getName());
- mapJoinOp.getConf().getParentToInput().put(pos, parentWork.getName());
+ MapJoinDesc joinConf = mapJoinOp.getConf();
+ long keyCount = Long.MAX_VALUE, rowCount = Long.MAX_VALUE, bucketCount = 1;
+ Statistics stats = parentRS.getStatistics();
+ if (stats != null) {
+ keyCount = rowCount = stats.getNumRows();
+ if (keyCount <= 0) {
+ keyCount = rowCount = Long.MAX_VALUE;
+ }
+ ArrayList<String> keyCols = parentRS.getConf().getOutputKeyColumnNames();
+ if (keyCols != null && !keyCols.isEmpty()) {
+ // See if we can arrive at a smaller number using distinct stats from key columns.
+ long maxKeyCount = 1;
+ String prefix = Utilities.ReduceField.KEY.toString();
+ for (String keyCol : keyCols) {
+ ExprNodeDesc realCol = parentRS.getColumnExprMap().get(prefix + "." + keyCol);
+ ColStatistics cs = StatsUtils.getColStatisticsFromExpression(null, stats, realCol);
+ if (cs == null || cs.getCountDistint() <= 0) {
+ maxKeyCount = Long.MAX_VALUE;
+ break;
+ }
+ maxKeyCount *= cs.getCountDistint();
+ if (maxKeyCount >= keyCount) {
+ break;
+ }
+ }
+ keyCount = Math.min(maxKeyCount, keyCount);
+ }
+ if (joinConf.isBucketMapJoin()) {
+ OpTraits opTraits = mapJoinOp.getOpTraits();
+ bucketCount = (opTraits == null) ? -1 : opTraits.getNumBuckets();
+ if (bucketCount > 0) {
+ // We cannot obtain a better estimate without CustomPartitionVertex providing it
+ // to us somehow; in which case using statistics would be completely unnecessary.
+ keyCount /= bucketCount;
+ }
+ }
+ }
+ LOG.info("Mapjoin " + mapJoinOp + ", pos: " + pos + " --> " + parentWork.getName() + " ("
+ + keyCount + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
+ joinConf.getParentToInput().put(pos, parentWork.getName());
+ if (keyCount != Long.MAX_VALUE) {
+ joinConf.getParentKeyCounts().put(pos, keyCount);
+ }
int numBuckets = -1;
EdgeType edgeType = EdgeType.BROADCAST_EDGE;
- if (mapJoinOp.getConf().isBucketMapJoin()) {
+ if (joinConf.isBucketMapJoin()) {
// disable auto parallelism for bucket map joins
parentRS.getConf().setAutoParallel(false);
- numBuckets = (Integer) mapJoinOp.getConf().getBigTableBucketNumMapping().values().toArray()[0];
- if (mapJoinOp.getConf().getCustomBucketMapJoin()) {
+ numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0];
+ if (joinConf.getCustomBucketMapJoin()) {
edgeType = EdgeType.CUSTOM_EDGE;
} else {
edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Tue Aug 12 02:13:01 2014
@@ -49,8 +49,10 @@ public class MapJoinDesc extends JoinDes
private transient String bigTableAlias;
// for tez. used to remember which position maps to which logical input
+ // TODO: should these rather be arrays?
private Map<Integer, String> parentToInput = new HashMap<Integer, String>();
-
+ private Map<Integer, Long> parentKeyCounts = new HashMap<Integer, Long>();
+
// for tez. used to remember which type of a Bucket Map Join this is.
private boolean customBucketMapJoin;
@@ -86,6 +88,7 @@ public class MapJoinDesc extends JoinDes
this.bigTablePartSpecToFileMapping = clone.bigTablePartSpecToFileMapping;
this.dumpFilePrefix = clone.dumpFilePrefix;
this.parentToInput = clone.parentToInput;
+ this.parentKeyCounts = clone.parentKeyCounts;
this.customBucketMapJoin = clone.customBucketMapJoin;
}
@@ -127,6 +130,28 @@ public class MapJoinDesc extends JoinDes
this.parentToInput = parentToInput;
}
+ public Map<Integer, Long> getParentKeyCounts() {
+ return parentKeyCounts;
+ }
+
+ @Explain(displayName = "Estimated key counts", normalExplain = false)
+ public String getKeyCountsExplainDesc() {
+ StringBuilder result = null;
+ for (Map.Entry<Integer, Long> entry : parentKeyCounts.entrySet()) {
+ if (result == null) {
+ result = new StringBuilder();
+ } else {
+ result.append(", ");
+ }
+ result.append(parentToInput.get(entry.getKey())).append(" => ").append(entry.getValue());
+ }
+ return result == null ? null : result.toString();
+ }
+
+ public void setParentKeyCount(Map<Integer, Long> parentKeyCounts) {
+ this.parentKeyCounts = parentKeyCounts;
+ }
+
public Map<Byte, int[]> getValueIndices() {
return valueIndices;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java Tue Aug 12 02:13:01 2014
@@ -237,12 +237,14 @@ public class Statistics implements Seria
}
public ColStatistics getColumnStatisticsFromColName(String colName) {
+ if (columnStats == null) {
+ return null;
+ }
for (ColStatistics cs : columnStats.values()) {
if (cs.getColumnName().equalsIgnoreCase(colName)) {
return cs;
}
}
-
return null;
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q Tue Aug 12 02:13:01 2014
@@ -4,7 +4,7 @@ set hive.auto.convert.join.noconditional
-- Since the inputs are small, it should be automatically converted to mapjoin
-explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
+explain extended select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
explain
select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450';
Modified: hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out Tue Aug 12 02:13:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out?rev=1617399&r1=1617398&r2=1617399&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out Tue Aug 12 02:13:01 2014 differ