You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2015/05/15 00:46:02 UTC
[6/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
index 37ccf22..f971727 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
@@ -24,7 +24,9 @@ import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -123,13 +125,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
batchCounter++;
- // Do the per-batch setup for an outer join.
-
- outerPerBatchSetup(batch);
-
- // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
- // apply the filter before hash table matching.
-
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
@@ -139,6 +134,44 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
return;
}
+ // Do the per-batch setup for an outer join.
+
+ outerPerBatchSetup(batch);
+
+ // For outer join, remember our input rows before ON expression filtering or before
+ // hash table matching so we can generate results for all rows (matching and non matching)
+ // later.
+ boolean inputSelectedInUse = batch.selectedInUse;
+ if (inputSelectedInUse) {
+ // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+ // throw new HiveException("batch.selected is not in sort order and unique");
+ // }
+ System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+ }
+
+ // Filtering for outer join just removes rows available for hash table matching.
+ boolean someRowsFilteredOut = false;
+ if (bigTableFilterExpressions.length > 0) {
+ // Since the input
+ for (VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(batch);
+ }
+ someRowsFilteredOut = (batch.size != inputLogicalSize);
+ if (LOG.isDebugEnabled()) {
+ if (batch.selectedInUse) {
+ if (inputSelectedInUse) {
+ LOG.debug(CLASS_NAME +
+ " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ } else {
+ LOG.debug(CLASS_NAME +
+ " inputLogicalSize " + inputLogicalSize +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
@@ -146,9 +179,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column Long specific declarations.
*/
@@ -178,12 +208,16 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
*/
JoinUtil.JoinResult joinResult;
- if (!joinColVector.noNulls && joinColVector.isNull[0]) {
- // Null key is no match for whole batch.
+ if (batch.size == 0) {
+ // Whole repeated key batch was filtered out.
+ joinResult = JoinUtil.JoinResult.NOMATCH;
+ } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+ // Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// Handle *repeated* join key, if found.
long key = vector[0];
+ // LOG.debug(CLASS_NAME + " repeated key " + key);
if (useMinMax && (key < min || key > max)) {
// Out of range for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
@@ -199,7 +233,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+ finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+ inputSelectedInUse, inputLogicalSize);
} else {
/*
@@ -213,14 +248,13 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
int selected[] = batch.selected;
boolean selectedInUse = batch.selectedInUse;
- // For outer join we must apply the filter after match and cause some matches to become
- // non-matches, we do not track non-matches here. Instead we remember all non spilled rows
- // and compute non matches later in finishOuter.
int hashMapResultCount = 0;
- int matchCount = 0;
- int nonSpillCount = 0;
+ int allMatchCount = 0;
+ int equalKeySeriesCount = 0;
int spillCount = 0;
+ boolean atLeastOneNonMatch = someRowsFilteredOut;
+
/*
* Single-Column Long specific variables.
*/
@@ -232,9 +266,11 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
- for (int logical = 0; logical < inputLogicalSize; logical++) {
+ for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
/*
* Single-Column Long outer null detection.
*/
@@ -250,8 +286,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
- // Remember non-matches for Outer Join.
- nonSpills[nonSpillCount++] = batchIndex;
+ atLeastOneNonMatch = true;
+
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
@@ -269,9 +305,12 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
// New key.
if (haveSaveKey) {
- // Move on with our count(s).
+ // Move on with our counts.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -300,41 +339,70 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
} else {
saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
}
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
- } else {
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
- }
- /*
- * Common outer join result processing.
- */
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name());
- switch (saveJoinResult) {
- case MATCH:
- matchs[matchCount] = batchIndex;
- matchHashMapResultIndices[matchCount] = hashMapResultCount;
- matchCount++;
- nonSpills[nonSpillCount++] = batchIndex;
- break;
-
- case SPILL:
- spills[spillCount] = batchIndex;
- spillHashMapResultIndices[spillCount] = hashMapResultCount;
- spillCount++;
- break;
-
- case NOMATCH:
- nonSpills[nonSpillCount++] = batchIndex;
- // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
- break;
+ /*
+ * Common outer join result processing.
+ */
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+ equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+ equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ atLeastOneNonMatch = true;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+ break;
+ }
+ } else {
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+ // Series of equal keys.
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+ break;
+ }
}
+ // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+ // throw new HiveException("allMatchs is not in sort order and unique");
+ // }
}
}
if (haveSaveKey) {
- // Account for last equal key sequence.
+ // Update our counts for the last key.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -345,27 +413,26 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter +
- " matchs " + intArrayToRangesString(matchs, matchCount) +
- " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
- " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+ " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+ " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+ " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+ " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+ " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+ " atLeastOneNonMatch " + atLeastOneNonMatch +
+ " inputSelectedInUse " + inputSelectedInUse +
+ " inputLogicalSize " + inputLogicalSize +
" spills " + intArrayToRangesString(spills, spillCount) +
" spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
- // Note that scratch1 is undefined at this point -- it's preallocated storage.
- numSel = finishOuter(batch,
- matchs, matchHashMapResultIndices, matchCount,
- nonSpills, nonSpillCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount,
- scratch1);
+ finishOuter(batch,
+ allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+ inputSelectedInUse, inputLogicalSize,
+ spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
index 23a29f7..bea032a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
@@ -128,13 +128,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
batchCounter++;
- // Do the per-batch setup for an outer join.
-
- outerPerBatchSetup(batch);
-
- // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
- // apply the filter before hash table matching.
-
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
@@ -144,6 +137,44 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
return;
}
+ // Do the per-batch setup for an outer join.
+
+ outerPerBatchSetup(batch);
+
+ // For outer join, remember our input rows before ON expression filtering or before
+ // hash table matching so we can generate results for all rows (matching and non matching)
+ // later.
+ boolean inputSelectedInUse = batch.selectedInUse;
+ if (inputSelectedInUse) {
+ // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+ // throw new HiveException("batch.selected is not in sort order and unique");
+ // }
+ System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+ }
+
+ // Filtering for outer join just removes rows available for hash table matching.
+ boolean someRowsFilteredOut = false;
+ if (bigTableFilterExpressions.length > 0) {
+ // Since the input
+ for (VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(batch);
+ }
+ someRowsFilteredOut = (batch.size != inputLogicalSize);
+ if (LOG.isDebugEnabled()) {
+ if (batch.selectedInUse) {
+ if (inputSelectedInUse) {
+ LOG.debug(CLASS_NAME +
+ " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ } else {
+ LOG.debug(CLASS_NAME +
+ " inputLogicalSize " + inputLogicalSize +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
@@ -151,9 +182,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Multi-Key specific declarations.
*/
@@ -199,8 +227,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
*/
JoinUtil.JoinResult joinResult;
- if (someKeyInputColumnIsNull) {
- // Any null key column is no match for whole batch.
+ if (batch.size == 0) {
+ // Whole repeated key batch was filtered out.
+ joinResult = JoinUtil.JoinResult.NOMATCH;
+ } else if (someKeyInputColumnIsNull) {
+ // Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
@@ -219,7 +250,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+ finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+ inputSelectedInUse, inputLogicalSize);
} else {
/*
@@ -233,14 +265,13 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
int selected[] = batch.selected;
boolean selectedInUse = batch.selectedInUse;
- // For outer join we must apply the filter after match and cause some matches to become
- // non-matches, we do not track non-matches here. Instead we remember all non spilled rows
- // and compute non matches later in finishOuter.
int hashMapResultCount = 0;
- int matchCount = 0;
- int nonSpillCount = 0;
+ int allMatchCount = 0;
+ int equalKeySeriesCount = 0;
int spillCount = 0;
+ boolean atLeastOneNonMatch = someRowsFilteredOut;
+
/*
* Multi-Key specific variables.
*/
@@ -252,9 +283,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
- for (int logical = 0; logical < inputLogicalSize; logical++) {
+ for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
/*
* Multi-Key outer null detection.
*/
@@ -272,8 +305,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
- // Remember non-matches for Outer Join.
- nonSpills[nonSpillCount++] = batchIndex;
+ atLeastOneNonMatch = true;
+
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
@@ -292,9 +325,12 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
// New key.
if (haveSaveKey) {
- // Move on with our count(s).
+ // Move on with our counts.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -322,41 +358,68 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
byte[] keyBytes = saveKeyOutput.getData();
int keyLength = saveKeyOutput.getLength();
saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
- } else {
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
- }
- /*
- * Common outer join result processing.
- */
+ /*
+ * Common outer join result processing.
+ */
- switch (saveJoinResult) {
- case MATCH:
- matchs[matchCount] = batchIndex;
- matchHashMapResultIndices[matchCount] = hashMapResultCount;
- matchCount++;
- nonSpills[nonSpillCount++] = batchIndex;
- break;
-
- case SPILL:
- spills[spillCount] = batchIndex;
- spillHashMapResultIndices[spillCount] = hashMapResultCount;
- spillCount++;
- break;
-
- case NOMATCH:
- nonSpills[nonSpillCount++] = batchIndex;
- // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
- break;
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+ equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+ equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ atLeastOneNonMatch = true;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+ break;
+ }
+ } else {
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+ // Series of equal keys.
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+ break;
+ }
}
+ // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+ // throw new HiveException("allMatchs is not in sort order and unique");
+ // }
}
}
if (haveSaveKey) {
- // Account for last equal key sequence.
+ // Update our counts for the last key.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -367,27 +430,26 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter +
- " matchs " + intArrayToRangesString(matchs, matchCount) +
- " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
- " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+ " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+ " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+ " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+ " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+ " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+ " atLeastOneNonMatch " + atLeastOneNonMatch +
+ " inputSelectedInUse " + inputSelectedInUse +
+ " inputLogicalSize " + inputLogicalSize +
" spills " + intArrayToRangesString(spills, spillCount) +
" spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
- // Note that scratch1 is undefined at this point -- it's preallocated storage.
- numSel = finishOuter(batch,
- matchs, matchHashMapResultIndices, matchCount,
- nonSpills, nonSpillCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount,
- scratch1);
+ finishOuter(batch,
+ allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+ inputSelectedInUse, inputLogicalSize,
+ spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
index f0af3f6..49efe1a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
@@ -115,13 +115,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
batchCounter++;
- // Do the per-batch setup for an outer join.
-
- outerPerBatchSetup(batch);
-
- // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
- // apply the filter before hash table matching.
-
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
@@ -131,6 +124,44 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
return;
}
+ // Do the per-batch setup for an outer join.
+
+ outerPerBatchSetup(batch);
+
+ // For outer join, remember our input rows before ON expression filtering or before
+ // hash table matching so we can generate results for all rows (matching and non matching)
+ // later.
+ boolean inputSelectedInUse = batch.selectedInUse;
+ if (inputSelectedInUse) {
+ // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+ // throw new HiveException("batch.selected is not in sort order and unique");
+ // }
+ System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+ }
+
+ // Filtering for outer join just removes rows available for hash table matching.
+ boolean someRowsFilteredOut = false;
+ if (bigTableFilterExpressions.length > 0) {
+ // Since the input
+ for (VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(batch);
+ }
+ someRowsFilteredOut = (batch.size != inputLogicalSize);
+ if (LOG.isDebugEnabled()) {
+ if (batch.selectedInUse) {
+ if (inputSelectedInUse) {
+ LOG.debug(CLASS_NAME +
+ " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ } else {
+ LOG.debug(CLASS_NAME +
+ " inputLogicalSize " + inputLogicalSize +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
@@ -138,9 +169,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column String specific declarations.
*/
@@ -172,8 +200,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
*/
JoinUtil.JoinResult joinResult;
- if (!joinColVector.noNulls && joinColVector.isNull[0]) {
- // Null key is no match for whole batch.
+ if (batch.size == 0) {
+ // Whole repeated key batch was filtered out.
+ joinResult = JoinUtil.JoinResult.NOMATCH;
+ } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+ // Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// Handle *repeated* join key, if found.
@@ -190,7 +221,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+ finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+ inputSelectedInUse, inputLogicalSize);
} else {
/*
@@ -204,14 +236,13 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
int selected[] = batch.selected;
boolean selectedInUse = batch.selectedInUse;
- // For outer join we must apply the filter after match and cause some matches to become
- // non-matches, we do not track non-matches here. Instead we remember all non spilled rows
- // and compute non matches later in finishOuter.
int hashMapResultCount = 0;
- int matchCount = 0;
- int nonSpillCount = 0;
+ int allMatchCount = 0;
+ int equalKeySeriesCount = 0;
int spillCount = 0;
+ boolean atLeastOneNonMatch = someRowsFilteredOut;
+
/*
* Single-Column String specific variables.
*/
@@ -223,9 +254,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
- for (int logical = 0; logical < inputLogicalSize; logical++) {
+ for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
/*
* Single-Column String outer null detection.
*/
@@ -241,8 +274,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
- // Remember non-matches for Outer Join.
- nonSpills[nonSpillCount++] = batchIndex;
+ atLeastOneNonMatch = true;
+
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
@@ -262,9 +295,12 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
// New key.
if (haveSaveKey) {
- // Move on with our count(s).
+ // Move on with our counts.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -290,43 +326,69 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
byte[] keyBytes = vector[batchIndex];
int keyStart = start[batchIndex];
int keyLength = length[batchIndex];
-
saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
- } else {
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
- }
- /*
- * Common outer join result processing.
- */
+ /*
+ * Common outer join result processing.
+ */
- switch (saveJoinResult) {
- case MATCH:
- matchs[matchCount] = batchIndex;
- matchHashMapResultIndices[matchCount] = hashMapResultCount;
- matchCount++;
- nonSpills[nonSpillCount++] = batchIndex;
- break;
-
- case SPILL:
- spills[spillCount] = batchIndex;
- spillHashMapResultIndices[spillCount] = hashMapResultCount;
- spillCount++;
- break;
-
- case NOMATCH:
- nonSpills[nonSpillCount++] = batchIndex;
- // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
- break;
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+ equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+ equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ atLeastOneNonMatch = true;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+ break;
+ }
+ } else {
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+ // Series of equal keys.
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+ break;
+ }
}
+ // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+ // throw new HiveException("allMatchs is not in sort order and unique");
+ // }
}
}
if (haveSaveKey) {
- // Account for last equal key sequence.
+ // Update our counts for the last key.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -337,27 +399,26 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter +
- " matchs " + intArrayToRangesString(matchs, matchCount) +
- " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
- " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+ " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+ " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+ " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+ " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+ " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+ " atLeastOneNonMatch " + atLeastOneNonMatch +
+ " inputSelectedInUse " + inputSelectedInUse +
+ " inputLogicalSize " + inputLogicalSize +
" spills " + intArrayToRangesString(spills, spillCount) +
" spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
- // Note that scratch1 is undefined at this point -- it's preallocated storage.
- numSel = finishOuter(batch,
- matchs, matchHashMapResultIndices, matchCount,
- nonSpills, nonSpillCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount,
- scratch1);
+ finishOuter(batch,
+ allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+ inputSelectedInUse, inputLogicalSize,
+ spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
index 1c91be6..32b60d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
@@ -91,7 +91,7 @@ public class VectorMapJoinRowBytesContainer {
}
tmpFile = File.createTempFile("BytesContainer", ".tmp", parentFile);
- LOG.info("BytesContainer created temp file " + tmpFile.getAbsolutePath());
+ LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath());
tmpFile.deleteOnExit();
fileOutputStream = new FileOutputStream(tmpFile);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index f9550c9..6afaec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -54,13 +54,13 @@ public abstract class VectorMapJoinFastBytesHashMap
slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
- // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
keysAssigned++;
} else {
// Add another value.
- // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength);
- // LOG.info("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
}
}
@@ -77,7 +77,7 @@ public abstract class VectorMapJoinFastBytesHashMap
if (valueRefWord == -1) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
- // LOG.info("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
+ // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
optimizedHashMapResult.set(valueStore, valueRefWord);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 9dcaf8f..dceb99c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -49,11 +49,11 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = 1; // Count.
- // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
keysAssigned++;
} else {
// Add another value.
- // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
slotTriples[tripleIndex + 2]++;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index b6e6321..91d7fd6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -79,13 +79,13 @@ public abstract class VectorMapJoinFastBytesHashTable
while (true) {
int tripleIndex = 3 * slot;
if (slotTriples[tripleIndex] == 0) {
- // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
+ // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
isNewKey = true;;
break;
}
if (hashCode == slotTriples[tripleIndex + 1] &&
keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) {
- // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
+ // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
isNewKey = false;
break;
}
@@ -155,7 +155,7 @@ public abstract class VectorMapJoinFastBytesHashTable
}
// Use old value reference word.
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
newSlotTriples[newTripleIndex] = keyRef;
newSlotTriples[newTripleIndex + 1] = hashCode;
@@ -170,7 +170,7 @@ public abstract class VectorMapJoinFastBytesHashTable
largestNumberOfSteps = newLargestNumberOfSteps;
resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
metricExpands++;
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
}
protected long findReadSlot(byte[] keyBytes, int keyStart, int keyLength, long hashCode) {
@@ -181,7 +181,7 @@ public abstract class VectorMapJoinFastBytesHashTable
int i = 0;
while (true) {
int tripleIndex = slot * 3;
- // LOG.info("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
if (slotTriples[tripleIndex] != 0 && hashCode == slotTriples[tripleIndex + 1]) {
// Finally, verify the key bytes match.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
index f2f42ee..9d95d05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
@@ -112,7 +112,7 @@ public class VectorMapJoinFastKeyStore {
}
keyRefWord |= absoluteKeyOffset;
- // LOG.info("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
+ // LOG.debug("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
return keyRefWord;
}
@@ -122,7 +122,7 @@ public class VectorMapJoinFastKeyStore {
(int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift);
boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn);
- // LOG.info("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
if (isKeyLengthSmall && storedKeyLengthLength != keyLength) {
return false;
@@ -135,7 +135,7 @@ public class VectorMapJoinFastKeyStore {
// Read big value length we wrote with the value.
storedKeyLengthLength = writeBuffers.readVInt(readPos);
if (storedKeyLengthLength != keyLength) {
- // LOG.info("VectorMapJoinFastKeyStore equalKey no match big length");
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length");
return false;
}
}
@@ -148,11 +148,11 @@ public class VectorMapJoinFastKeyStore {
for (int i = 0; i < keyLength; i++) {
if (currentBytes[currentStart + i] != keyBytes[keyStart + i]) {
- // LOG.info("VectorMapJoinFastKeyStore equalKey no match on bytes");
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes");
return false;
}
}
- // LOG.info("VectorMapJoinFastKeyStore equalKey match on bytes");
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes");
return true;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index d6ad028..4725f55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -68,7 +68,7 @@ public class VectorMapJoinFastLongHashMap
optimizedHashMapResult.forget();
long hashCode = VectorMapJoinFastLongHashUtil.hashKey(key);
- // LOG.info("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
+ // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
long valueRef = findReadSlot(key, hashCode);
JoinUtil.JoinResult joinResult;
if (valueRef == -1) {
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 2137fb7..17855eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -121,13 +121,13 @@ public abstract class VectorMapJoinFastLongHashTable
int pairIndex = 2 * slot;
long valueRef = slotPairs[pairIndex];
if (valueRef == 0) {
- // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
isNewKey = true;
break;
}
long tableKey = slotPairs[pairIndex + 1];
if (key == tableKey) {
- // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
isNewKey = false;
break;
}
@@ -145,7 +145,7 @@ public abstract class VectorMapJoinFastLongHashTable
// debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot);
}
- // LOG.info("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
+ // LOG.debug("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
assignSlot(slot, key, isNewKey, currentValue);
@@ -206,7 +206,7 @@ public abstract class VectorMapJoinFastLongHashTable
}
// Use old value reference word.
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
newSlotPairs[newPairIndex] = valueRef;
newSlotPairs[newPairIndex + 1] = tableKey;
@@ -220,7 +220,7 @@ public abstract class VectorMapJoinFastLongHashTable
largestNumberOfSteps = newLargestNumberOfSteps;
resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
metricExpands++;
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
}
protected long findReadSlot(long key, long hashCode) {
@@ -235,20 +235,20 @@ public abstract class VectorMapJoinFastLongHashTable
long valueRef = slotPairs[pairIndex];
if (valueRef == 0) {
// Given that we do not delete, an empty slot means no match.
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
return -1;
}
long tableKey = slotPairs[pairIndex + 1];
if (key == tableKey) {
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
return slotPairs[pairIndex];
}
// Some other key (collision) - keep probing.
probeSlot += (++i);
if (i > largestNumberOfSteps) {
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot returning not found");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found");
// We know we never went that far when we were inserting.
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
return -1;
}
slot = (int)(probeSlot & logicalHashBucketMask);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
index 373b5f4..4b1d6f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
@@ -81,7 +81,7 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
int newThreshold = HashMapWrapper.calculateTableSize(
keyCountAdj, threshold, loadFactor, keyCount);
- // LOG.info("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
+ // LOG.debug("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
VectorMapJoinFastHashTable = createHashTable(newThreshold);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index caa705c..6491dc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -142,7 +142,7 @@ public class VectorMapJoinFastValueStore {
}
public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
- // LOG.info("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
+ // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
this.valueStore = valueStore;
this.valueRefWord = valueRefWord;
@@ -473,7 +473,7 @@ public class VectorMapJoinFastValueStore {
valueRefWord |= SmallValueLength.allBitsOnBitShifted;
}
- // LOG.info("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
+ // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
// The lower bits are the absolute value offset.
valueRefWord |= newAbsoluteOffset;
@@ -499,7 +499,7 @@ public class VectorMapJoinFastValueStore {
boolean isOldValueLast =
((oldValueRef & IsLastFlag.flagOnMask) != 0);
- // LOG.info("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
+ // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
/*
* Write information about the old value (which becomes our next) at the beginning
@@ -546,7 +546,7 @@ public class VectorMapJoinFastValueStore {
// The lower bits are the absolute value offset.
newValueRef |= newAbsoluteOffset;
- // LOG.info("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
+ // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
return newValueRef;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
index 60825ce..dc65eaa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
@@ -113,7 +113,7 @@ public class VectorMapJoinOptimizedLongCommon {
}
// byte[] bytes = Arrays.copyOf(currentKey.get(), currentKey.getLength());
- // LOG.info("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+ // LOG.debug("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
}
@@ -145,7 +145,7 @@ public class VectorMapJoinOptimizedLongCommon {
}
// byte[] bytes = Arrays.copyOf(output.getData(), output.getLength());
- // LOG.info("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+ // LOG.debug("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
serializedBytes.bytes = output.getData();
serializedBytes.offset = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 096239e..656a5e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1069,11 +1069,21 @@ public class Vectorizer implements PhysicalPlanResolver {
private boolean validateMapJoinDesc(MapJoinDesc desc) {
byte posBigTable = (byte) desc.getPosBigTable();
List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
+ if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) {
+ LOG.info("Cannot vectorize map work filter expression");
+ return false;
+ }
List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
+ if (!validateExprNodeDesc(keyExprs)) {
+ LOG.info("Cannot vectorize map work key expression");
+ return false;
+ }
List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
- return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) &&
- validateExprNodeDesc(keyExprs) &&
- validateExprNodeDesc(valueExprs);
+ if (!validateExprNodeDesc(valueExprs)) {
+ LOG.info("Cannot vectorize map work value expression");
+ return false;
+ }
+ return true;
}
private boolean validateReduceSinkOperator(ReduceSinkOperator op) {
@@ -1089,6 +1099,7 @@ public class Vectorizer implements PhysicalPlanResolver {
for (ExprNodeDesc desc : descList) {
boolean ret = validateExprNodeDesc(desc);
if (!ret) {
+ LOG.info("Cannot vectorize select expression: " + desc.toString());
return false;
}
}
@@ -1110,10 +1121,12 @@ public class Vectorizer implements PhysicalPlanResolver {
}
boolean ret = validateExprNodeDesc(desc.getKeys());
if (!ret) {
+ LOG.info("Cannot vectorize groupby key expression");
return false;
}
ret = validateAggregationDesc(desc.getAggregators(), isReduce);
if (!ret) {
+ LOG.info("Cannot vectorize groupby aggregate expression");
return false;
}
if (isReduce) {
@@ -1248,10 +1261,13 @@ public class Vectorizer implements PhysicalPlanResolver {
}
private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) {
- if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) {
+ String udfName = aggDesc.getGenericUDAFName().toLowerCase();
+ if (!supportedAggregationUdfs.contains(udfName)) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported");
return false;
}
if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported");
return false;
}
// See if we can vectorize the aggregation.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join30.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q
new file mode 100644
index 0000000..2275804
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join30.q
@@ -0,0 +1,160 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src;
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join_filters.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_filters.q b/ql/src/test/queries/clientpositive/vector_join_filters.q
new file mode 100644
index 0000000..adf525c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_filters.q
@@ -0,0 +1,38 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join_nulls.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_nulls.q b/ql/src/test/queries/clientpositive/vector_join_nulls.q
new file mode 100644
index 0000000..6cfb7a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_nulls.q
@@ -0,0 +1,33 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
index 098d002..62ad9ee 100644
--- a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
+++ b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
@@ -3,6 +3,8 @@ set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
+-- SORT_QUERY_RESULTS
+
drop table if exists TJOIN1;
drop table if exists TJOIN2;
create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc;