You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/03/26 18:31:42 UTC

svn commit: r1461234 [1/5] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/exec/persistence/ test/queries/clientpositive/ test/results/clientpositive/

Author: hashutosh
Date: Tue Mar 26 17:31:41 2013
New Revision: 1461234

URL: http://svn.apache.org/r1461234
Log:
HIVE-3381 : Result of outer join is not valid (Navis via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/mapjoin_test_outer.q
    hive/trunk/ql/src/test/results/clientpositive/mapjoin_test_outer.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
    hive/trunk/ql/src/test/results/clientpositive/auto_join21.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join29.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join7.q.out
    hive/trunk/ql/src/test/results/clientpositive/auto_join_filters.q.out
    hive/trunk/ql/src/test/results/clientpositive/join21.q.out
    hive/trunk/ql/src/test/results/clientpositive/join7.q.out
    hive/trunk/ql/src/test/results/clientpositive/join_1to1.q.out
    hive/trunk/ql/src/test/results/clientpositive/join_filters.q.out
    hive/trunk/ql/src/test/results/clientpositive/join_filters_overlap.q.out
    hive/trunk/ql/src/test/results/clientpositive/mapjoin1.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Tue Mar 26 17:31:41 2013
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.exec;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -52,40 +51,6 @@ public abstract class CommonJoinOperator
   protected static final Log LOG = LogFactory.getLog(CommonJoinOperator.class
       .getName());
 
-  /**
-   * IntermediateObject.
-   *
-   */
-  public static class IntermediateObject {
-    ArrayList<Object>[] objs;
-    int curSize;
-
-    public IntermediateObject(ArrayList<Object>[] objs, int curSize) {
-      this.objs = objs;
-      this.curSize = curSize;
-    }
-
-    public ArrayList<Object>[] getObjs() {
-      return objs;
-    }
-
-    public int getCurSize() {
-      return curSize;
-    }
-
-    public void pushObj(ArrayList<Object> newObj) {
-      objs[curSize++] = newObj;
-    }
-
-    public void popObj() {
-      curSize--;
-    }
-
-    public Object topObj() {
-      return objs[curSize - 1];
-    }
-  }
-
   protected transient int numAliases; // number of aliases
   /**
    * The expressions for join inputs.
@@ -97,7 +62,7 @@ public abstract class CommonJoinOperator
    */
   protected transient List<ExprNodeEvaluator>[] joinFilters;
 
-  protected transient int[][] filterMap;
+  protected transient int[][] filterMaps;
 
   /**
    * The ObjectInspectors for the join inputs.
@@ -123,14 +88,13 @@ public abstract class CommonJoinOperator
   protected transient boolean[] nullsafes;
 
   public transient boolean noOuterJoin;
-  protected transient Object[] dummyObj; // for outer joins, contains the
-  // potential nulls for the concerned
-  // aliases
-  protected transient RowContainer<ArrayList<Object>>[] dummyObjVectors; // empty
-  // rows
-  // for
-  // each
-  // table
+
+  // for outer joins, contains the potential nulls for the concerned aliases
+  protected transient ArrayList<Object>[] dummyObj;
+
+  // empty rows for each table
+  protected transient RowContainer<ArrayList<Object>>[] dummyObjVectors;
+
   protected transient int totalSz; // total size of the composite object
 
   // keys are the column names. basically this maps the position of the column
@@ -264,7 +228,7 @@ public abstract class CommonJoinOperator
     joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(
         joinValuesObjectInspectors,NOTSKIPBIGTABLE, tagLen);
 
-    filterMap = conf.getFilterMap();
+    filterMaps = conf.getFilterMap();
 
     if (noOuterJoin) {
       rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors;
@@ -282,7 +246,7 @@ public abstract class CommonJoinOperator
         JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE, tagLen);
     }
 
-    dummyObj = new Object[numAliases];
+    dummyObj = new ArrayList[numAliases];
     dummyObjVectors = new RowContainer[numAliases];
 
     joinEmitInterval = HiveConf.getIntVar(hconf,
@@ -314,7 +278,7 @@ public abstract class CommonJoinOperator
           rowContainerStandardObjectInspectors[pos],
           alias, 1, spillTableDesc, conf, !hasFilter(pos), reporter);
 
-      values.add((ArrayList<Object>) dummyObj[pos]);
+      values.add(dummyObj[pos]);
       dummyObjVectors[pos] = values;
 
       // if serde is null, the input doesn't need to be spilled out
@@ -328,6 +292,23 @@ public abstract class CommonJoinOperator
     }
 
     forwardCache = new Object[totalSz];
+    aliasFilterTags = new short[numAliases];
+    Arrays.fill(aliasFilterTags, (byte)0xff);
+
+    filterTags = new short[numAliases];
+    skipVectors = new boolean[numAliases][];
+    for(int i = 0; i < skipVectors.length; i++) {
+      skipVectors[i] = new boolean[i + 1];
+    }
+    intermediate = new List[numAliases];
+
+    offsets = new int[numAliases + 1];
+    int sum = 0;
+    for (int i = 0; i < numAliases; i++) {
+      offsets[i] = sum;
+      sum += joinValues[order[i]].size();
+    }
+    offsets[numAliases] = sum;
 
     outputObjInspector = getJoinOutputObjectInspector(order,
         joinValuesStandardObjectInspectors, conf);
@@ -338,16 +319,11 @@ public abstract class CommonJoinOperator
       }
     }
 
-    LOG.info("JOIN "
-        + outputObjInspector.getTypeName()
-        + " totalsz = " + totalSz);
-
+    LOG.info("JOIN " + outputObjInspector.getTypeName() + " totalsz = " + totalSz);
   }
 
+  transient boolean newGroupStarted = false;
 
-
-
-transient boolean newGroupStarted = false;
   @Override
   public void startGroup() throws HiveException {
     LOG.trace("Join: Starting new group");
@@ -367,353 +343,267 @@ transient boolean newGroupStarted = fals
   }
 
   protected transient Byte alias;
+  protected transient Object[] forwardCache;
 
-  transient Object[] forwardCache;
+  // pre-calculated offset values for each alias
+  protected transient int[] offsets;
 
-  private void createForwardJoinObject(IntermediateObject intObj,
-      boolean[] nullsArr) throws HiveException {
-    int p = 0;
+  // a array of bitvectors where each entry denotes whether the element is to
+  // be used or not (whether it is null or not). The size of the bitvector is
+  // same as the number of inputs(aliases) under consideration currently.
+  // When all inputs are accounted for, the output is forwarded appropriately.
+  protected transient boolean[][] skipVectors;
+
+  // caches objects before constructing forward cache
+  protected transient List[] intermediate;
+
+  // filter tags for objects
+  protected transient short[] filterTags;
+
+  // ANDed value of all filter tags in current join group
+  // if any of values passes on outer join alias (which makes zero for the tag alias),
+  // it means there exists a pair for it, and no need to check outer join (just do inner join)
+  //
+  // for example, with table a, b something like,
+  //   a, b = 100, 10 | 100, 20 | 100, 30
+  //
+  // the query "a FOJ b ON a.k=b.k AND a.v>0 AND b.v>20" makes values with tag
+  //
+  //   a = 100, 10, 00000010 | 100, 20, 00000010 | 100, 30, 00000010 : 0/1 for 'b' (alias 1)
+  //   b = 100, 10, 00000001 | 100, 20, 00000001 | 100, 30, 00000000 : 0/1 for 'a' (alias 0)
+  //
+  // which makes aliasFilterTags for a = 00000010, for b = 00000000
+  //
+  // for LO, b = 0000000(0) means there is a pair object(s) in 'b' (has no 'a'-null case)
+  // for RO, a = 000000(1)0 means there is no pair object in 'a' (has null-'b' case)
+  //
+  // result : 100, 10 + 100, 30 | 100, 20 + 100, 30 | 100, 30 + 100, 30 |
+  //          N       + 100, 10 | N       + 100, 20
+  //
+  protected transient short[] aliasFilterTags;
+
+  // all evaluation should be processed here for valid aliasFilterTags
+  //
+  // for MapJoin, filter tag is pre-calculated in MapredLocalTask and stored with value.
+  // when reading the hashtable, MapJoinObjectValue calcuates alias filter and provide it to join
+  protected ArrayList<Object> getFilteredValue(byte alias, Object row) throws HiveException {
+    boolean hasFilter = hasFilter(alias);
+    ArrayList<Object> nr = JoinUtil.computeValues(row, joinValues[alias],
+        joinValuesObjectInspectors[alias], hasFilter);
+    if (hasFilter) {
+      short filterTag = JoinUtil.isFiltered(row, joinFilters[alias],
+          joinFilterObjectInspectors[alias], filterMaps[alias]);
+      nr.add(new ShortWritable(filterTag));
+      aliasFilterTags[alias] &= filterTag;
+    }
+    return nr;
+  }
+
+  // fill forwardCache with skipvector
+  private void createForwardJoinObject(boolean[] skip) throws HiveException {
+    Arrays.fill(forwardCache, null);
+
+    boolean forward = false;
     for (int i = 0; i < numAliases; i++) {
-      Byte alias = order[i];
-      int sz = joinValues[alias].size();
-      if (nullsArr[i]) {
-        for (int j = 0; j < sz; j++) {
-          forwardCache[p++] = null;
-        }
-      } else {
-        ArrayList<Object> obj = intObj.getObjs()[i];
-        for (int j = 0; j < sz; j++) {
-          forwardCache[p++] = obj.get(j);
+      if (!skip[i]) {
+        for (int j = offsets[i]; j < offsets[i + 1]; j++) {
+          forwardCache[j] = intermediate[i].get(j - offsets[i]);
         }
+        forward = true;
       }
     }
-
-    forward(forwardCache, outputObjInspector);
-    countAfterReport = 0;
-  }
-
-  private void copyOldArray(boolean[] src, boolean[] dest) {
-    for (int i = 0; i < src.length; i++) {
-      dest[i] = src[i];
+    if (forward) {
+      forward(forwardCache, null);
+      countAfterReport = 0;
     }
   }
 
-  private ArrayList<boolean[]> joinObjectsInnerJoin(
-      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
-      ArrayList<Object> newObj, IntermediateObject intObj, int left,
-      boolean newObjNull) {
-    if (newObjNull) {
-      return resNulls;
-    }
-    Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      boolean oldObjNull = oldNulls[left];
-      if (!oldObjNull) {
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        copyOldArray(oldNulls, newNulls);
-        newNulls[oldNulls.length] = false;
-        resNulls.add(newNulls);
-      }
-    }
-    return resNulls;
-  }
+  // entry point (aliasNum = 0)
+  private void genJoinObject() throws HiveException {
+    boolean rightFirst = true;
+    boolean hasFilter = hasFilter(order[0]);
+    AbstractRowContainer<ArrayList<Object>> aliasRes = storage[order[0]];
+    for (List<Object> rightObj = aliasRes.first(); rightObj != null; rightObj = aliasRes.next()) {
+      boolean rightNull = rightObj == dummyObj[0];
+      if (hasFilter) {
+        filterTags[0] = getFilterTag(rightObj);
+      }
+      skipVectors[0][0] = rightNull;
+      intermediate[0] = rightObj;
 
-  /**
-   * Implement semi join operator.
-   */
-  private ArrayList<boolean[]> joinObjectsLeftSemiJoin(
-      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
-      ArrayList<Object> newObj, IntermediateObject intObj, int left,
-      boolean newObjNull) {
-    if (newObjNull) {
-      return resNulls;
-    }
-    Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      boolean oldObjNull = oldNulls[left];
-      if (!oldObjNull) {
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        copyOldArray(oldNulls, newNulls);
-        newNulls[oldNulls.length] = false;
-        resNulls.add(newNulls);
-      }
-    }
-    return resNulls;
-  }
-
-  private ArrayList<boolean[]> joinObjectsLeftOuterJoin(
-      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
-      ArrayList<Object> newObj, IntermediateObject intObj, int left, int right,
-      boolean newObjNull) {
-    // newObj is null if is already null or
-    // if the row corresponding to the left alias does not pass through filter
-    newObjNull |= isLeftFiltered(left, right, intObj.getObjs()[left]);
-
-    Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      boolean oldObjNull = oldNulls[left];
-      boolean[] newNulls = new boolean[intObj.getCurSize()];
-      copyOldArray(oldNulls, newNulls);
-      if (oldObjNull) {
-        newNulls[oldNulls.length] = true;
-      } else {
-        newNulls[oldNulls.length] = newObjNull;
-      }
-      resNulls.add(newNulls);
+      genObject(1, rightFirst, rightNull);
+      rightFirst = false;
     }
-    return resNulls;
   }
 
-  private ArrayList<boolean[]> joinObjectsRightOuterJoin(
-      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
-      ArrayList<Object> newObj, IntermediateObject intObj, int left, int right,
-      boolean newObjNull, boolean firstRow) {
-    if (newObjNull) {
-      return resNulls;
-    }
-
-    if (inputNulls.isEmpty() && firstRow) {
-      boolean[] newNulls = new boolean[intObj.getCurSize()];
-      for (int i = 0; i < intObj.getCurSize() - 1; i++) {
-        newNulls[i] = true;
-      }
-      newNulls[intObj.getCurSize() - 1] = newObjNull;
-      resNulls.add(newNulls);
-      return resNulls;
-    }
+  // creates objects in recursive manner
+  private void genObject(int aliasNum, boolean allLeftFirst, boolean allLeftNull)
+      throws HiveException {
+    if (aliasNum < numAliases) {
 
-    boolean allOldObjsNull = firstRow;
+      boolean[] skip = skipVectors[aliasNum];
+      boolean[] prevSkip = skipVectors[aliasNum - 1];
 
-    Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      if (!oldNulls[left]) {
-        allOldObjsNull = false;
-        break;
-      }
-    }
+      JoinCondDesc joinCond = condn[aliasNum - 1];
+      int type = joinCond.getType();
+      int left = joinCond.getLeft();
+      int right = joinCond.getRight();
 
-    // if the row does not pass through filter, all old Objects are null
-    if (isRightFiltered(left, right, newObj)) {
-      allOldObjsNull = true;
-    }
-    nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      boolean oldObjNull = oldNulls[left] || allOldObjsNull;
+      // search for match in the rhs table
+      AbstractRowContainer<ArrayList<Object>> aliasRes = storage[order[aliasNum]];
 
-      if (!oldObjNull) {
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        copyOldArray(oldNulls, newNulls);
-        newNulls[oldNulls.length] = newObjNull;
-        resNulls.add(newNulls);
-      } else if (allOldObjsNull) {
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        for (int i = 0; i < intObj.getCurSize() - 1; i++) {
-          newNulls[i] = true;
+      boolean done = false;
+      boolean loopAgain = false;
+      boolean tryLOForFO = type == JoinDesc.FULL_OUTER_JOIN;
+
+      boolean rightFirst = true;
+      for (List<Object> rightObj = aliasRes.first(); !done && rightObj != null;
+           rightObj = loopAgain ? rightObj : aliasRes.next(), rightFirst = loopAgain = false) {
+        System.arraycopy(prevSkip, 0, skip, 0, prevSkip.length);
+
+        boolean rightNull = rightObj == dummyObj[aliasNum];
+        if (hasFilter(order[aliasNum])) {
+          filterTags[aliasNum] = getFilterTag(rightObj);
         }
-        newNulls[oldNulls.length] = newObjNull;
-        resNulls.add(newNulls);
-        return resNulls;
-      }
-    }
-    return resNulls;
-  }
-
-  private ArrayList<boolean[]> joinObjectsFullOuterJoin(
-      ArrayList<boolean[]> resNulls, ArrayList<boolean[]> inputNulls,
-      ArrayList<Object> newObj, IntermediateObject intObj, int left, int right,
-      boolean newObjNull, boolean firstRow) {
-    if (newObjNull) {
-      Iterator<boolean[]> nullsIter = inputNulls.iterator();
-      while (nullsIter.hasNext()) {
-        boolean[] oldNulls = nullsIter.next();
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        copyOldArray(oldNulls, newNulls);
-        newNulls[oldNulls.length] = newObjNull;
-        resNulls.add(newNulls);
-      }
-      return resNulls;
-    }
-
-    if (inputNulls.isEmpty() && firstRow) {
-      boolean[] newNulls = new boolean[intObj.getCurSize()];
-      for (int i = 0; i < intObj.getCurSize() - 1; i++) {
-        newNulls[i] = true;
-      }
-      newNulls[intObj.getCurSize() - 1] = newObjNull;
-      resNulls.add(newNulls);
-      return resNulls;
-    }
-
-    boolean allOldObjsNull = firstRow;
-
-    Iterator<boolean[]> nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      if (!oldNulls[left]) {
-        allOldObjsNull = false;
-        break;
-      }
-    }
-
-    // if the row does not pass through filter, all old Objects are null
-    if (isRightFiltered(left, right, newObj)) {
-      allOldObjsNull = true;
-    }
-    boolean rhsPreserved = false;
-
-    nullsIter = inputNulls.iterator();
-    while (nullsIter.hasNext()) {
-      boolean[] oldNulls = nullsIter.next();
-      // old obj is null even if the row corresponding to the left alias
-      // does not pass through filter
-      boolean oldObjNull = oldNulls[left] || allOldObjsNull
-          || isLeftFiltered(left, right, intObj.getObjs()[left]);
-      if (!oldObjNull) {
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        copyOldArray(oldNulls, newNulls);
-        newNulls[oldNulls.length] = newObjNull;
-        resNulls.add(newNulls);
-      } else if (oldObjNull) {
-        boolean[] newNulls = new boolean[intObj.getCurSize()];
-        copyOldArray(oldNulls, newNulls);
-        newNulls[oldNulls.length] = true;
-        resNulls.add(newNulls);
-
-        if (allOldObjsNull && !rhsPreserved) {
-          newNulls = new boolean[intObj.getCurSize()];
-          for (int i = 0; i < oldNulls.length; i++) {
-            newNulls[i] = true;
+        skip[right] = rightNull;
+
+        if (type == JoinDesc.INNER_JOIN) {
+          innerJoin(skip, left, right);
+        } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
+          if (innerJoin(skip, left, right)) {
+            // if left-semi-join found a match, skipping the rest of the rows in the
+            // rhs table of the semijoin
+            done = true;
+          }
+        } else if (type == JoinDesc.LEFT_OUTER_JOIN ||
+            (type == JoinDesc.FULL_OUTER_JOIN && rightNull)) {
+          int result = leftOuterJoin(skip, left, right);
+          if (result < 0) {
+            continue;
+          }
+          done = result > 0;
+        } else if (type == JoinDesc.RIGHT_OUTER_JOIN ||
+            (type == JoinDesc.FULL_OUTER_JOIN && allLeftNull)) {
+          if (allLeftFirst && !rightOuterJoin(skip, left, right) ||
+            !allLeftFirst && !innerJoin(skip, left, right)) {
+            continue;
+          }
+        } else if (type == JoinDesc.FULL_OUTER_JOIN) {
+          if (tryLOForFO && leftOuterJoin(skip, left, right) > 0) {
+            loopAgain = allLeftFirst;
+            done = !loopAgain;
+            tryLOForFO = false;
+          } else if (allLeftFirst && !rightOuterJoin(skip, left, right) ||
+            !allLeftFirst && !innerJoin(skip, left, right)) {
+            continue;
           }
-          newNulls[oldNulls.length] = false;
-          resNulls.add(newNulls);
-          rhsPreserved = true;
         }
+        intermediate[aliasNum] = rightObj;
+
+        // recursively call the join the other rhs tables
+        genObject(aliasNum + 1, allLeftFirst && rightFirst, allLeftNull && rightNull);
       }
+    } else if (!allLeftNull) {
+      createForwardJoinObject(skipVectors[numAliases - 1]);
     }
-    return resNulls;
   }
 
-  /*
-   * The new input is added to the list of existing inputs. Each entry in the
-   * array of inputNulls denotes the entries in the intermediate object to be
-   * used. The intermediate object is augmented with the new object, and list of
-   * nulls is changed appropriately. The list will contain all non-nulls for a
-   * inner join. The outer joins are processed appropriately.
-   */
-  private ArrayList<boolean[]> joinObjects(ArrayList<boolean[]> inputNulls,
-      ArrayList<Object> newObj, IntermediateObject intObj, int joinPos,
-      boolean firstRow) {
-    ArrayList<boolean[]> resNulls = new ArrayList<boolean[]>();
-    boolean newObjNull = newObj == dummyObj[joinPos] ? true : false;
-    if (joinPos == 0) {
-      if (newObjNull) {
-        return null;
-      }
-      boolean[] nulls = new boolean[1];
-      nulls[0] = newObjNull;
-      resNulls.add(nulls);
-      return resNulls;
-    }
-
-    int left = condn[joinPos - 1].getLeft();
-    int right = condn[joinPos - 1].getRight();
-    int type = condn[joinPos - 1].getType();
-
-    // process all nulls for RIGHT and FULL OUTER JOINS
-    if (((type == JoinDesc.RIGHT_OUTER_JOIN) || (type == JoinDesc.FULL_OUTER_JOIN))
-        && !newObjNull && (inputNulls == null) && firstRow) {
-      boolean[] newNulls = new boolean[intObj.getCurSize()];
-      for (int i = 0; i < newNulls.length - 1; i++) {
-        newNulls[i] = true;
-      }
-      newNulls[newNulls.length - 1] = false;
-      resNulls.add(newNulls);
-      return resNulls;
-    }
-
-    if (inputNulls == null) {
-      return null;
-    }
-
-    if (type == JoinDesc.INNER_JOIN) {
-      return joinObjectsInnerJoin(resNulls, inputNulls, newObj, intObj, left,
-          newObjNull);
-    } else if (type == JoinDesc.LEFT_OUTER_JOIN) {
-      return joinObjectsLeftOuterJoin(resNulls, inputNulls, newObj, intObj,
-          left, right, newObjNull);
-    } else if (type == JoinDesc.RIGHT_OUTER_JOIN) {
-      return joinObjectsRightOuterJoin(resNulls, inputNulls, newObj, intObj,
-          left, right, newObjNull, firstRow);
-    } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
-      return joinObjectsLeftSemiJoin(resNulls, inputNulls, newObj, intObj,
-          left, newObjNull);
-    }
-
-    assert (type == JoinDesc.FULL_OUTER_JOIN);
-    return joinObjectsFullOuterJoin(resNulls, inputNulls, newObj, intObj, left, right,
-        newObjNull, firstRow);
-  }
-
-  /*
-   * genObject is a recursive function. For the inputs, a array of bitvectors is
-   * maintained (inputNulls) where each entry denotes whether the element is to
-   * be used or not (whether it is null or not). The size of the bitvector is
-   * same as the number of inputs under consideration currently. When all inputs
-   * are accounted for, the output is forwarded appropriately.
-   */
-  private void genObject(ArrayList<boolean[]> inputNulls, int aliasNum,
-      IntermediateObject intObj, boolean firstRow) throws HiveException {
-    boolean childFirstRow = firstRow;
-    boolean skipping = false;
+  // inner join
+  private boolean innerJoin(boolean[] skip, int left, int right) {
+    if (!isInnerJoin(skip, left, right)) {
+      Arrays.fill(skip, true);
+      return false;
+    }
+    return true;
+  }
 
-    if (aliasNum < numAliases) {
+  // LO
+  //
+  // LEFT\RIGHT   skip  filtered   valid
+  // skip        --(1)     --(1)    --(1)
+  // filtered    +-(1)     +-(1)    +-(1)
+  // valid       +-(1)     +-(4*)   ++(2)
+  //
+  // * If right alias has any pair for left alias, continue (3)
+  // -1 for continue : has pair but not in this turn
+  //  0 for inner join (++) : join and continue LO
+  //  1 for left outer join (+-) : join and skip further LO
+  private int leftOuterJoin(boolean[] skip, int left, int right) {
+    if (skip[left] || skip[right] || !isLeftValid(left, right)) {
+      skip[right] = true;
+      return 1;   // case 1
+    }
+    if (isRightValid(left, right)) {
+      return 0;   // case 2
+    }
+    if (hasRightPairForLeft(left, right)) {
+      return -1;  // case 3
+    }
+    skip[right] = true;
+    return 1;     // case 4
+  }
 
-      // search for match in the rhs table
-      AbstractRowContainer<ArrayList<Object>> aliasRes = storage[order[aliasNum]];
+  // RO
+  //
+  // LEFT\RIGHT   skip  filtered   valid
+  // skip        --(1)     -+(1)   -+(1)
+  // filtered    --(1)     -+(1)   -+(4*)
+  // valid       --(1)     -+(1)   ++(2)
+  //
+  // * If left alias has any pair for right alias, continue (3)
+  // false for continue : has pair but not in this turn
+  private boolean rightOuterJoin(boolean[] skip, int left, int right) {
+    if (skip[left] || skip[right] || !isRightValid(left, right)) {
+      Arrays.fill(skip, 0, right, true);
+      return true;  // case 1
+    }
+    if (isLeftValid(left, right)) {
+      return true;  // case 2
+    }
+    if (hasLeftPairForRight(left, right)) {
+      return false; // case 3
+    }
+    Arrays.fill(skip, 0, right, true);
+    return true;    // case 4
+  }
 
-      for (ArrayList<Object> newObj = aliasRes.first(); newObj != null; newObj = aliasRes
-          .next()) {
+  // If left and right aliases are all valid, two values will be inner joined,
+  private boolean isInnerJoin(boolean[] skip, int left, int right) {
+    return !skip[left] && !skip[right] &&
+        isLeftValid(left, right) && isRightValid(left, right);
+  }
 
-        // check for skipping in case of left semi join
-        if (aliasNum > 0
-            && condn[aliasNum - 1].getType() == JoinDesc.LEFT_SEMI_JOIN
-            && newObj != dummyObj[aliasNum]) { // successful match
-          skipping = true;
-        }
+  // check if left is valid
+  private boolean isLeftValid(int left, int right) {
+    return !hasFilter(left) || !JoinUtil.isFiltered(filterTags[left], right);
+  }
 
-        intObj.pushObj(newObj);
+  // check if right is valid
+  private boolean isRightValid(int left, int right) {
+    return !hasFilter(right) || !JoinUtil.isFiltered(filterTags[right], left);
+  }
 
-        // execute the actual join algorithm
-        ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj,
-            aliasNum, childFirstRow);
+  // check if any left pair exists for right objects
+  private boolean hasLeftPairForRight(int left, int right) {
+    return !JoinUtil.isFiltered(aliasFilterTags[left], right);
+  }
 
-        // recursively call the join the other rhs tables
-        genObject(newNulls, aliasNum + 1, intObj, firstRow);
+  // check if any right pair exists for left objects
+  private boolean hasRightPairForLeft(int left, int right) {
+    return !JoinUtil.isFiltered(aliasFilterTags[right], left);
+  }
 
-        intObj.popObj();
-        firstRow = false;
+  private boolean hasAnyFiltered(int alias, List<Object> row) {
+    return row == dummyObj[alias] || hasFilter(alias) && JoinUtil.hasAnyFiltered(getFilterTag(row));
+  }
 
-        // if left-semi-join found a match, skipping the rest of the rows in the
-        // rhs table of the semijoin
-        if (skipping) {
-          break;
-        }
-      }
-    } else {
-      if (inputNulls == null) {
-        return;
-      }
-      Iterator<boolean[]> nullsIter = inputNulls.iterator();
-      while (nullsIter.hasNext()) {
-        boolean[] nullsVec = nullsIter.next();
-        createForwardJoinObject(intObj, nullsVec);
-      }
-    }
+  protected final boolean hasFilter(int alias) {
+    return filterMaps != null && filterMaps[alias] != null;
+  }
+
+  // get tag value from object (last of list)
+  protected final short getFilterTag(List<Object> row) {
+    return ((ShortWritable) row.get(row.size() - 1)).get();
   }
 
   /**
@@ -780,7 +670,7 @@ transient boolean newGroupStarted = fals
         }
 
         if (alw.size() == 0) {
-          alw.add((ArrayList<Object>) dummyObj[i]);
+          alw.add(dummyObj[i]);
           hasNulls = true;
         } else if (condn[i].getPreserved()) {
           preserve = true;
@@ -818,7 +708,7 @@ transient boolean newGroupStarted = fals
         } else {
           if (alw.size() == 0) {
             hasEmpty = true;
-            alw.add((ArrayList<Object>) dummyObj[i]);
+            alw.add(dummyObj[i]);
           } else if (!hasEmpty && alw.size() == 1) {
             if (hasAnyFiltered(alias, alw.first())) {
               hasEmpty = true;
@@ -848,40 +738,11 @@ transient boolean newGroupStarted = fals
         LOG.trace("called genUniqueJoinObject");
       } else {
         LOG.trace("calling genObject");
-        genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0),
-            true);
+        genJoinObject();
         LOG.trace("called genObject");
       }
     }
-  }
-
-  // returns filter result of left object by filters associated with right alias
-  private boolean isLeftFiltered(int left, int right, List<Object> leftObj) {
-    if (joinValues[order[left]].size() < leftObj.size()) {
-      ShortWritable filter = (ShortWritable) leftObj.get(leftObj.size() - 1);
-      return JoinUtil.isFiltered(filter.get(), right);
-    }
-    return false;
-  }
-
-  // returns filter result of right object by filters associated with left alias
-  private boolean isRightFiltered(int left, int right, List<Object> rightObj) {
-    if (joinValues[order[right]].size() < rightObj.size()) {
-      ShortWritable filter = (ShortWritable) rightObj.get(rightObj.size() - 1);
-      return JoinUtil.isFiltered(filter.get(), left);
-    }
-    return false;
-  }
-
-  // returns object has any filtered tag
-  private boolean hasAnyFiltered(int alias, List<Object> row) {
-    return row == dummyObj[alias] ||
-        hasFilter(alias) &&
-        JoinUtil.hasAnyFiltered(((ShortWritable) row.get(row.size() - 1)).get());
-  }
-
-  protected final boolean hasFilter(int alias) {
-    return filterMap != null && filterMap[alias] != null;
+    Arrays.fill(aliasFilterTags, (byte)0xff);
   }
 
   protected void reportProgress() {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java Tue Mar 26 17:31:41 2013
@@ -81,7 +81,7 @@ public class HashTableSinkOperator exten
    */
   protected transient List<ExprNodeEvaluator>[] joinFilters;
 
-  protected transient int[][] filterMap;
+  protected transient int[][] filterMaps;
 
   protected transient int numAliases; // number of aliases
   /**
@@ -121,16 +121,18 @@ public class HashTableSinkOperator exten
     SerDe serde;
     TableDesc tblDesc;
     Configuration conf;
+    boolean hasFilter;
 
     /**
      * @param standardOI
      * @param serde
      */
     public HashTableSinkObjectCtx(ObjectInspector standardOI, SerDe serde, TableDesc tblDesc,
-        Configuration conf) {
+        boolean hasFilter, Configuration conf) {
       this.standardOI = standardOI;
       this.serde = serde;
       this.tblDesc = tblDesc;
+      this.hasFilter = hasFilter;
       this.conf = conf;
     }
 
@@ -152,6 +154,10 @@ public class HashTableSinkOperator exten
       return tblDesc;
     }
 
+    public boolean hasFilterTag() {
+      return hasFilter;
+    }
+
     public Configuration getConf() {
       return conf;
     }
@@ -193,7 +199,7 @@ public class HashTableSinkOperator exten
     totalSz = 0;
 
     noOuterJoin = conf.isNoOuterJoin();
-    filterMap = conf.getFilterMap();
+    filterMaps = conf.getFilterMap();
 
     int tagLen = conf.getTagLength();
 
@@ -228,7 +234,7 @@ public class HashTableSinkOperator exten
           continue;
         }
         List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
-        if (filterMap != null && filterMap[alias] != null) {
+        if (filterMaps != null && filterMaps[alias] != null) {
           // for each alias, add object inspector for filter tag as the last element
           rcOIs = new ArrayList<ObjectInspector>(rcOIs);
           rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
@@ -298,9 +304,12 @@ public class HashTableSinkOperator exten
     MapJoinMetaData.clear();
     MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx(
         ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(),
-            ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, hconf));
+            ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, false, hconf));
   }
 
+  private boolean hasFilter(int alias) {
+    return filterMaps != null && filterMaps[alias] != null;
+  }
   /*
    * This operator only process small tables Read the key/value pairs Load them into hashtable
    */
@@ -320,9 +329,8 @@ public class HashTableSinkOperator exten
           joinKeysObjectInspectors[alias]);
 
       Object[] value = JoinUtil.computeMapJoinValues(row, joinValues[alias],
-          joinValuesObjectInspectors[alias], joinFilters[alias], joinFilterObjectInspectors
-              [alias], filterMap == null ? null : filterMap[alias]);
-
+          joinValuesObjectInspectors[alias], joinFilters[alias], joinFilterObjectInspectors[alias],
+          filterMaps == null ? null : filterMaps[alias]);
 
       HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue> hashTable = mapJoinTables[alias];
 
@@ -382,8 +390,9 @@ public class HashTableSinkOperator exten
     StandardStructObjectInspector standardOI = ObjectInspectorFactory
         .getStandardStructObjectInspector(newNames, newFields);
 
-    MapJoinMetaData.put(Integer.valueOf(metadataValueTag[tag]), new HashTableSinkObjectCtx(
-        standardOI, valueSerDe, valueTableDesc, hconf));
+    int alias = Integer.valueOf(metadataValueTag[tag]);
+    MapJoinMetaData.put(alias, new HashTableSinkObjectCtx(
+        standardOI, valueSerDe, valueTableDesc, hasFilter(alias), hconf));
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java Tue Mar 26 17:31:41 2013
@@ -80,12 +80,7 @@ public class JoinOperator extends Common
         nextSz = joinEmitInterval;
       }
 
-
-      ArrayList<Object> nr = JoinUtil.computeValues(row, joinValues[alias],
-          joinValuesObjectInspectors[alias], joinFilters[alias],
-          joinFilterObjectInspectors[alias],
-          filterMap == null ? null : filterMap[alias]);
-
+      ArrayList<Object> nr = getFilteredValue(alias, row);
 
       if (handleSkewJoin) {
         skewJoinKeyContext.handleSkew(tag);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java Tue Mar 26 17:31:41 2013
@@ -220,24 +220,20 @@ public class JoinUtil {
   /**
    * Return the value as a standard object. StandardObject can be inspected by a
    * standard ObjectInspector.
+   * If it would be tagged by filter, reserve one more slot for that.
    */
   public static ArrayList<Object> computeValues(Object row,
-      List<ExprNodeEvaluator> valueFields, List<ObjectInspector> valueFieldsOI,
-      List<ExprNodeEvaluator> filters, List<ObjectInspector> filtersOI,
-      int[] filterMap) throws HiveException {
+      List<ExprNodeEvaluator> valueFields, List<ObjectInspector> valueFieldsOI, boolean hasFilter)
+      throws HiveException {
 
     // Compute the values
-    ArrayList<Object> nr = new ArrayList<Object>(valueFields.size());
+    int reserve = hasFilter ? valueFields.size() + 1 : valueFields.size();
+    ArrayList<Object> nr = new ArrayList<Object>(reserve);
     for (int i = 0; i < valueFields.size(); i++) {
       nr.add(ObjectInspectorUtils.copyToStandardObject(valueFields.get(i)
           .evaluate(row), valueFieldsOI.get(i),
           ObjectInspectorCopyOption.WRITABLE));
     }
-    if (filterMap != null) {
-      // add whether the row is filtered or not.
-      nr.add(new ShortWritable(isFiltered(row, filters, filtersOI, filterMap)));
-    }
-
     return nr;
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java Tue Mar 26 17:31:41 2013
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.plan.Ta
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -118,7 +119,7 @@ public class MapJoinOperator extends Abs
     keySerializer.initialize(null, keyTableDesc.getProperties());
     MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx(
         ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(),
-            ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, hconf));
+            ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, false, hconf));
 
     for (int pos = 0; pos < order.length; pos++) {
       if (pos == posBigTable) {
@@ -134,9 +135,10 @@ public class MapJoinOperator extends Abs
           null);
       valueSerDe.initialize(null, valueTableDesc.getProperties());
 
+      ObjectInspector inspector = valueSerDe.getObjectInspector();
       MapJoinMetaData.put(Integer.valueOf(pos), new HashTableSinkObjectCtx(ObjectInspectorUtils
-          .getStandardObjectInspector(valueSerDe.getObjectInspector(),
-              ObjectInspectorCopyOption.WRITABLE), valueSerDe, valueTableDesc, hconf));
+          .getStandardObjectInspector(inspector, ObjectInspectorCopyOption.WRITABLE),
+          valueSerDe, valueTableDesc, hasFilter(pos), hconf));
     }
   }
 
@@ -228,10 +230,7 @@ public class MapJoinOperator extends Abs
       // compute keys and values as StandardObjects
       AbstractMapJoinKey key = JoinUtil.computeMapJoinKeys(row, joinKeys[alias],
           joinKeysObjectInspectors[alias]);
-      ArrayList<Object> value = JoinUtil.computeValues(row, joinValues[alias],
-          joinValuesObjectInspectors[alias], joinFilters[alias], joinFilterObjectInspectors
-              [alias], filterMap == null ? null : filterMap[alias]);
-
+      ArrayList<Object> value = getFilteredValue(alias, row);
 
       // Add the value to the ArrayList
       storage[alias].add(value);
@@ -252,6 +251,7 @@ public class MapJoinOperator extends Abs
           } else {
             rowContainer.reset(o.getObj());
             storage[pos] = rowContainer;
+            aliasFilterTags[pos] = o.getAliasFilter();
           }
         }
       }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Tue Mar 26 17:31:41 2013
@@ -256,10 +256,7 @@ public class SMBMapJoinOperator extends 
     // compute keys and values as StandardObjects
     ArrayList<Object> key = JoinUtil.computeKeys(row, joinKeys[alias],
         joinKeysObjectInspectors[alias]);
-    ArrayList<Object> value = JoinUtil.computeValues(row, joinValues[alias],
-        joinValuesObjectInspectors[alias], joinFilters[alias],
-        joinFilterObjectInspectors[alias],
-        filterMap == null ? null : filterMap[alias]);
+    ArrayList<Object> value = getFilteredValue(alias, row);
 
 
     //have we reached a new key group?

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java?rev=1461234&r1=1461233&r2=1461234&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java Tue Mar 26 17:31:41 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Ma
 import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.io.Writable;
@@ -40,7 +41,7 @@ public class MapJoinObjectValue implemen
   protected transient int metadataTag;
   protected transient MapJoinRowContainer<Object[]> obj;
 
-
+  protected transient byte aliasFilter = (byte) 0xff;
 
   public MapJoinObjectValue() {
 
@@ -55,6 +56,10 @@ public class MapJoinObjectValue implemen
     this.obj = obj;
   }
 
+  public byte getAliasFilter() {
+    return aliasFilter;
+  }
+
   @Override
   public boolean equals(Object o) {
     if (o instanceof MapJoinObjectValue) {
@@ -86,7 +91,6 @@ public class MapJoinObjectValue implemen
       // get the tableDesc from the map stored in the mapjoin operator
       HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag));
       int sz = in.readInt();
-
       MapJoinRowContainer<Object[]> res = new MapJoinRowContainer<Object[]>();
       if (sz > 0) {
         int numCols = in.readInt();
@@ -102,7 +106,11 @@ public class MapJoinObjectValue implemen
             if (memObj == null) {
               res.add(new ArrayList<Object>(0).toArray());
             } else {
-              res.add(memObj.toArray());
+              Object[] array = memObj.toArray();
+              res.add(array);
+              if (ctx.hasFilterTag()) {
+                aliasFilter &= ((ShortWritable)array[array.length - 1]).get();
+              }
             }
           }
         } else {

Added: hive/trunk/ql/src/test/queries/clientpositive/mapjoin_test_outer.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_test_outer.q?rev=1461234&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/mapjoin_test_outer.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/mapjoin_test_outer.q Tue Mar 26 17:31:41 2013
@@ -0,0 +1,54 @@
+set hive.auto.convert.join = false;
+--HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition
+
+create table dest_1 (key STRING, value STRING) stored as textfile;
+insert overwrite table dest_1 select * from src1 order by src1.value limit 8;
+insert into table dest_1 select "333444","555666" from src1 limit 1;
+
+create table dest_2 (key STRING, value STRING) stored as textfile;
+
+insert into table dest_2 select * from dest_1;
+
+SELECT * FROM src1
+  RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src2.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+explain
+SELECT /*+ mapjoin(src1, src2) */ * FROM src1
+  RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src2.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+SELECT /*+ mapjoin(src1, src2) */ * FROM src1
+  RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src2.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+SELECT /*+ mapjoin(src1, src2) */ * FROM src1
+  RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src1.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+set hive.auto.convert.join = true;
+
+SELECT * FROM src1
+  LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src1.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+SELECT * FROM src1
+  LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src2.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+explain
+SELECT * FROM src1
+  RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src2.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;
+
+SELECT * FROM src1
+  RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key)
+  JOIN dest_2 src3 ON (src2.key = src3.key)
+  SORT BY src1.key, src2.key, src3.key;