You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/09 06:44:18 UTC

svn commit: r1480527 [2/2] - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ test/org/apache/hadoop/hive/ql/exec/vector/expressions/

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java?rev=1480527&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java Thu May  9 04:44:17 2013
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Filter the rows in a batch by comparing one string column to another. 
+ * This code is generated from a template.
+ */
+public class FilterStringColNotEqualStringCol extends VectorExpression {
+  private int colNum1;
+  private int colNum2;
+
+  public FilterStringColNotEqualStringCol(int colNum1, int colNum2) { 
+    this.colNum1 = colNum1;
+    this.colNum2 = colNum2;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1];
+    BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2];
+    int[] sel = batch.selected;
+    boolean[] nullPos1 = inputColVector1.isNull;
+    boolean[] nullPos2 = inputColVector2.isNull;
+    int n = batch.size;
+    byte[][] vector1 = inputColVector1.vector;
+    byte[][] vector2 = inputColVector2.vector;
+    int[] start1 = inputColVector1.start;
+    int[] start2 = inputColVector2.start;
+    int[] length1 = inputColVector1.length;
+    int[] length2 = inputColVector2.length;
+    
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+    
+    // handle case where neither input has nulls
+    if (inputColVector1.noNulls && inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+      
+        /* Either all must remain selected or all will be eliminated.
+         * Repeating property will not change.
+         */
+        if (!(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                 vector2[0], start2[0], length2[0]) != 0)) {
+          batch.size = 0;
+        }      
+      } else if (inputColVector1.isRepeating) {
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (batch.selectedInUse) {
+        int newSize = 0;
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                 vector2[i], start2[i], length2[i]) != 0) {
+            sel[newSize++] = i;
+          }
+        }
+        batch.size = newSize;
+      } else {
+        int newSize = 0;
+        for(int i = 0; i != n; i++) {
+          if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                 vector2[i], start2[i], length2[i]) != 0) {
+            sel[newSize++] = i;
+          }
+        }
+        if (newSize < batch.size) {
+          batch.size = newSize;
+          batch.selectedInUse = true;
+        }
+      }
+    
+    // handle case where only input 2 has nulls
+    } else if (inputColVector1.noNulls) { 
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (nullPos2[0] ||
+            !(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                               vector2[0], start2[0], length2[0]) != 0)) {
+          batch.size = 0; 
+        } 
+      } else if (inputColVector1.isRepeating) {
+         
+         // no need to check for nulls in input 1
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+        if (nullPos2[0]) {
+        
+          // no values will qualify because every comparison will be with NULL
+          batch.size = 0;
+          return;
+        }
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else { // neither input is repeating
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }      
+      }
+      
+    // handle case where only input 1 has nulls
+    } else if (inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (nullPos1[0] ||
+            !(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                               vector2[0], start2[0], length2[0]) != 0)) {
+          batch.size = 0; 
+          return;
+        } 
+      } else if (inputColVector1.isRepeating) {
+        if (nullPos1[0]) {
+        
+          // if repeating value is null then every comparison will fail so nothing qualifies
+          batch.size = 0;
+          return; 
+        }
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) != 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else { // neither input is repeating
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }      
+      }
+            
+    // handle case where both inputs have nulls
+    } else {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (nullPos1[0] || nullPos2[0] ||
+            !(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                               vector2[0], start2[0], length2[0]) != 0)) {
+          batch.size = 0; 
+        } 
+      } else if (inputColVector1.isRepeating) {
+         if (nullPos1[0]) {
+           batch.size = 0;
+           return;
+         }
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+        if (nullPos2[0]) {
+          batch.size = 0;
+          return;
+        }
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else { // neither input is repeating
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i] && !nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos1[i] && !nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) != 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }      
+      } 
+    }
+  }
+
+  @Override
+  public String getOutputType() {
+    return "boolean";
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return -1;
+  }
+}

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java?rev=1480527&r1=1480526&r2=1480527&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java Thu May  9 04:44:17 2013
@@ -154,6 +154,13 @@ public class CodeGen {
       {"FilterStringColumnCompareScalar", "LessEqual", "<="},
       {"FilterStringColumnCompareScalar", "Greater", ">"},
       {"FilterStringColumnCompareScalar", "GreaterEqual", ">="},
+      
+      {"FilterStringColumnCompareColumn", "Equal", "=="},
+      {"FilterStringColumnCompareColumn", "NotEqual", "!="},
+      {"FilterStringColumnCompareColumn", "Less", "<"},
+      {"FilterStringColumnCompareColumn", "LessEqual", "<="},
+      {"FilterStringColumnCompareColumn", "Greater", ">"},
+      {"FilterStringColumnCompareColumn", "GreaterEqual", ">="},
 
       {"FilterColumnCompareColumn", "Equal", "long", "double", "=="},
       {"FilterColumnCompareColumn", "Equal", "double", "double", "=="},
@@ -267,6 +274,8 @@ public class CodeGen {
         generateColumnUnaryMinus(tdesc);
       } else if (tdesc[0].equals("FilterStringColumnCompareScalar")) {
         generateFilterStringColumnCompareScalar(tdesc);
+      } else if (tdesc[0].equals("FilterStringColumnCompareColumn")) {
+        generateFilterStringColumnCompareColumn(tdesc);
       } else {
         continue;
       }
@@ -376,6 +385,12 @@ public class CodeGen {
     String className = "FilterStringCol" + operatorName + "StringScalar";
     generateFilterStringColumnCompareScalar(tdesc,className);
   }
+  
+  private void generateFilterStringColumnCompareColumn(String[] tdesc) throws IOException {
+    String operatorName = tdesc[1];
+    String className = "FilterStringCol" + operatorName + "StringCol";
+    generateFilterStringColumnCompareScalar(tdesc,className);
+  }
 
   private void generateFilterStringColumnCompareScalar(String[] tdesc, String className) throws IOException {
    String operatorSymbol = tdesc[2];

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt?rev=1480527&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt Thu May  9 04:44:17 2013
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Filter the rows in a batch by comparing one string column to another. 
+ * This code is generated from a template.
+ */
+public class <ClassName> extends VectorExpression {
+  private int colNum1;
+  private int colNum2;
+
+  public <ClassName>(int colNum1, int colNum2) { 
+    this.colNum1 = colNum1;
+    this.colNum2 = colNum2;
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1];
+    BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2];
+    int[] sel = batch.selected;
+    boolean[] nullPos1 = inputColVector1.isNull;
+    boolean[] nullPos2 = inputColVector2.isNull;
+    int n = batch.size;
+    byte[][] vector1 = inputColVector1.vector;
+    byte[][] vector2 = inputColVector2.vector;
+    int[] start1 = inputColVector1.start;
+    int[] start2 = inputColVector2.start;
+    int[] length1 = inputColVector1.length;
+    int[] length2 = inputColVector2.length;
+    
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+    
+    // handle case where neither input has nulls
+    if (inputColVector1.noNulls && inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+      
+        /* Either all must remain selected or all will be eliminated.
+         * Repeating property will not change.
+         */
+        if (!(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                 vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+          batch.size = 0;
+        }      
+      } else if (inputColVector1.isRepeating) {
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (batch.selectedInUse) {
+        int newSize = 0;
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                 vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+            sel[newSize++] = i;
+          }
+        }
+        batch.size = newSize;
+      } else {
+        int newSize = 0;
+        for(int i = 0; i != n; i++) {
+          if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                 vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+            sel[newSize++] = i;
+          }
+        }
+        if (newSize < batch.size) {
+          batch.size = newSize;
+          batch.selectedInUse = true;
+        }
+      }
+    
+    // handle case where only input 2 has nulls
+    } else if (inputColVector1.noNulls) { 
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (nullPos2[0] ||
+            !(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                               vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+          batch.size = 0; 
+        } 
+      } else if (inputColVector1.isRepeating) {
+         
+         // no need to check for nulls in input 1
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+        if (nullPos2[0]) {
+        
+          // no values will qualify because every comparison will be with NULL
+          batch.size = 0;
+          return;
+        }
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                   vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else { // neither input is repeating
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }      
+      }
+      
+    // handle case where only input 1 has nulls
+    } else if (inputColVector2.noNulls) {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (nullPos1[0] ||
+            !(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                               vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+          batch.size = 0; 
+          return;
+        } 
+      } else if (inputColVector1.isRepeating) {
+        if (nullPos1[0]) {
+        
+          // if repeating value is null then every comparison will fail so nothing qualifies
+          batch.size = 0;
+          return; 
+        }
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                   vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+              sel[newSize++] = i;
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else { // neither input is repeating
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }      
+      }
+            
+    // handle case where both inputs have nulls
+    } else {
+      if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+        if (nullPos1[0] || nullPos2[0] ||
+            !(StringExpr.compare(vector1[0], start1[0], length1[0], 
+                               vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+          batch.size = 0; 
+        } 
+      } else if (inputColVector1.isRepeating) {
+         if (nullPos1[0]) {
+           batch.size = 0;
+           return;
+         }
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos2[i]) {
+              if (StringExpr.compare(vector1[0], start1[0], length1[0], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else if (inputColVector2.isRepeating) {
+        if (nullPos2[0]) {
+          batch.size = 0;
+          return;
+        }
+        if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos1[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }
+      } else { // neither input is repeating
+         if (batch.selectedInUse) {
+          int newSize = 0;
+          for(int j = 0; j != n; j++) {
+            int i = sel[j];
+            if (!nullPos1[i] && !nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          batch.size = newSize;
+        } else {
+          int newSize = 0;
+          for(int i = 0; i != n; i++) {
+            if (!nullPos1[i] && !nullPos2[i]) {
+              if (StringExpr.compare(vector1[i], start1[i], length1[i], 
+                                     vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+                sel[newSize++] = i;
+              }
+            }
+          }
+          if (newSize < batch.size) {
+            batch.size = newSize;
+            batch.selectedInUse = true;
+          }
+        }      
+      } 
+    }
+  }
+
+  @Override
+  public String getOutputType() {
+    return "boolean";
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return -1;
+  }
+}

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java?rev=1480527&r1=1480526&r2=1480527&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java Thu May  9 04:44:17 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColEqualStringScalar;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringCol;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.
   FilterStringColGreaterEqualStringScalar;
 import org.junit.Test;
@@ -45,6 +46,7 @@ public class TestVectorStringExpressions
   private static byte[] greenred;
   private static byte[] redgreen;
   private static byte[] greengreen;
+  private static byte[] blue;
   private static byte[] emptyString;
   private static byte[] mixedUp;
   private static byte[] mixedUpLower;
@@ -54,6 +56,7 @@ public class TestVectorStringExpressions
 
   static {
     try {
+      blue = "blue".getBytes("UTF-8");
       red = "red".getBytes("UTF-8");
       redred = "redred".getBytes("UTF-8");
       green = "green".getBytes("UTF-8");
@@ -164,6 +167,157 @@ public class TestVectorStringExpressions
     Assert.assertTrue(batch.selected[0] == 0);
     Assert.assertTrue(batch.selected[1] == 1);
   }
+  
+  @Test
+  public void testStringColCompareStringColFilter() {
+    VectorizedRowBatch batch;
+    VectorExpression expr;
+    
+    /* input data
+     * 
+     * col0       col1
+     * ===============
+     * blue       red
+     * green      green
+     * red        blue
+     * NULL       red            col0 data is empty string if we un-set NULL property
+     */    
+    
+    // nulls possible on left, right
+    batch = makeStringBatchForColColCompare();
+    expr = new FilterStringColLessStringCol(0,1);
+    expr.evaluate(batch);
+    Assert.assertEquals(1, batch.size);
+    Assert.assertEquals(0, batch.selected[0]);
+    
+    // no nulls possible
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].noNulls = true;
+    batch.cols[1].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(2, batch.size);
+    Assert.assertEquals(3, batch.selected[1]);    
+    
+    // nulls on left, no nulls on right
+    batch = makeStringBatchForColColCompare();
+    batch.cols[1].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(1, batch.size);
+    Assert.assertEquals(0, batch.selected[0]);
+    
+    // nulls on right, no nulls on left
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].noNulls = true;
+    batch.cols[1].isNull[3] = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(1, batch.size);
+    Assert.assertEquals(0, batch.selected[0]);    
+    
+    // Now vary isRepeating
+    // nulls possible on left, right
+    
+    // left repeats
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(3, batch.size);
+    Assert.assertEquals(3, batch.selected[2]);
+    
+    // right repeats
+    batch = makeStringBatchForColColCompare();
+    batch.cols[1].isRepeating = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(2, batch.size); // first 2 qualify
+    Assert.assertEquals(1, batch.selected[1]);
+    
+    // left and right repeat
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].isRepeating = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(4, batch.size);
+    
+    // Now vary isRepeating
+    // nulls possible only on left
+    
+    // left repeats
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(3, batch.size);
+    Assert.assertEquals(3, batch.selected[2]);
+    
+    // left repeats and is null
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].noNulls = true;
+    batch.cols[0].isNull[0] = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(0, batch.size);
+    
+    // right repeats
+    batch = makeStringBatchForColColCompare();
+    batch.cols[1].isRepeating = true;
+    batch.cols[1].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(3, batch.size); 
+    Assert.assertEquals(1, batch.selected[1]);
+    
+    // left and right repeat
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].isRepeating = true;
+    batch.cols[1].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(4, batch.size);
+
+    
+    // Now vary isRepeating
+    // nulls possible only on right
+    
+    // left repeats
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    batch.cols[1].isNull[0] = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(2, batch.size);
+    Assert.assertEquals(3, batch.selected[1]);
+    
+    // right repeats
+    batch = makeStringBatchForColColCompare();
+    batch.cols[1].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(3, batch.size); 
+    Assert.assertEquals(3, batch.selected[2]);
+    
+    // right repeats and is null
+    batch = makeStringBatchForColColCompare();
+    batch.cols[1].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    batch.cols[1].isNull[0] = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(0, batch.size); 
+
+    // left and right repeat
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(4, batch.size);
+    
+    // left and right repeat and right is null
+    batch = makeStringBatchForColColCompare();
+    batch.cols[0].isRepeating = true;
+    batch.cols[1].isRepeating = true;
+    batch.cols[0].noNulls = true;
+    batch.cols[1].isNull[0] = true;
+    expr.evaluate(batch);
+    Assert.assertEquals(0, batch.size);  
+  }
 
   VectorizedRowBatch makeStringBatch() {
     // create a batch with one string ("Bytes") column
@@ -383,6 +537,38 @@ public class TestVectorStringExpressions
     batch.size = 3;
     return batch;
   }
+  
+  private VectorizedRowBatch makeStringBatchForColColCompare() {
+    VectorizedRowBatch batch = new VectorizedRowBatch(3);
+    BytesColumnVector v = new BytesColumnVector();
+    batch.cols[0] = v;
+    BytesColumnVector v2 = new BytesColumnVector();
+    batch.cols[1] = v2;
+    batch.cols[2] = new BytesColumnVector();
+
+    v.setRef(0, blue, 0, blue.length);
+    v.isNull[0] = false;
+    v.setRef(1, green, 0, green.length);
+    v.isNull[1] = false;
+    v.setRef(2,  red,  0,  red.length);
+    v.isNull[2] = false;
+    v.setRef(3, emptyString, 0, emptyString.length);
+    v.isNull[3] = true;
+    v.noNulls = false;
+
+    v2.setRef(0, red, 0, red.length);
+    v2.isNull[0] = false;
+    v2.setRef(1, green, 0, green.length);
+    v2.isNull[1] = false;
+    v2.setRef(2,  blue,  0,  blue.length);
+    v2.isNull[2] = false;
+    v2.setRef(3, red, 0, red.length);
+    v2.isNull[3] = false;
+    v2.noNulls = false;
+    
+    batch.size = 4;
+    return batch;
+  }  
 
   @Test
   public void testStringLike() {