You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/09 06:44:18 UTC
svn commit: r1480527 [2/2] - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/
test/org/apache/hadoop/hive/ql/exec/vector/expressions/
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java?rev=1480527&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java Thu May 9 04:44:17 2013
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Filter the rows in a batch by comparing one string column to another.
+ * This code is generated from a template.
+ */
+public class FilterStringColNotEqualStringCol extends VectorExpression {
+ private int colNum1;
+ private int colNum2;
+
+ public FilterStringColNotEqualStringCol(int colNum1, int colNum2) {
+ this.colNum1 = colNum1;
+ this.colNum2 = colNum2;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1];
+ BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2];
+ int[] sel = batch.selected;
+ boolean[] nullPos1 = inputColVector1.isNull;
+ boolean[] nullPos2 = inputColVector2.isNull;
+ int n = batch.size;
+ byte[][] vector1 = inputColVector1.vector;
+ byte[][] vector2 = inputColVector2.vector;
+ int[] start1 = inputColVector1.start;
+ int[] start2 = inputColVector2.start;
+ int[] length1 = inputColVector1.length;
+ int[] length2 = inputColVector2.length;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ // handle case where neither input has nulls
+ if (inputColVector1.noNulls && inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+
+ /* Either all must remain selected or all will be eliminated.
+ * Repeating property will not change.
+ */
+ if (!(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) != 0)) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+
+ // handle case where only input 2 has nulls
+ } else if (inputColVector1.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (nullPos2[0] ||
+ !(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) != 0)) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+
+ // no need to check for nulls in input 1
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (nullPos2[0]) {
+
+ // no values will qualify because every comparison will be with NULL
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else { // neither input is repeating
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ }
+
+ // handle case where only input 1 has nulls
+ } else if (inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (nullPos1[0] ||
+ !(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) != 0)) {
+ batch.size = 0;
+ return;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (nullPos1[0]) {
+
+ // if repeating value is null then every comparison will fail so nothing qualifies
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else { // neither input is repeating
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ }
+
+ // handle case where both inputs have nulls
+ } else {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (nullPos1[0] || nullPos2[0] ||
+ !(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) != 0)) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (nullPos1[0]) {
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (nullPos2[0]) {
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else { // neither input is repeating
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i] && !nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos1[i] && !nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) != 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return -1;
+ }
+}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java?rev=1480527&r1=1480526&r2=1480527&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java Thu May 9 04:44:17 2013
@@ -154,6 +154,13 @@ public class CodeGen {
{"FilterStringColumnCompareScalar", "LessEqual", "<="},
{"FilterStringColumnCompareScalar", "Greater", ">"},
{"FilterStringColumnCompareScalar", "GreaterEqual", ">="},
+
+ {"FilterStringColumnCompareColumn", "Equal", "=="},
+ {"FilterStringColumnCompareColumn", "NotEqual", "!="},
+ {"FilterStringColumnCompareColumn", "Less", "<"},
+ {"FilterStringColumnCompareColumn", "LessEqual", "<="},
+ {"FilterStringColumnCompareColumn", "Greater", ">"},
+ {"FilterStringColumnCompareColumn", "GreaterEqual", ">="},
{"FilterColumnCompareColumn", "Equal", "long", "double", "=="},
{"FilterColumnCompareColumn", "Equal", "double", "double", "=="},
@@ -267,6 +274,8 @@ public class CodeGen {
generateColumnUnaryMinus(tdesc);
} else if (tdesc[0].equals("FilterStringColumnCompareScalar")) {
generateFilterStringColumnCompareScalar(tdesc);
+ } else if (tdesc[0].equals("FilterStringColumnCompareColumn")) {
+ generateFilterStringColumnCompareColumn(tdesc);
} else {
continue;
}
@@ -376,6 +385,12 @@ public class CodeGen {
String className = "FilterStringCol" + operatorName + "StringScalar";
generateFilterStringColumnCompareScalar(tdesc,className);
}
+
+ private void generateFilterStringColumnCompareColumn(String[] tdesc) throws IOException {
+ String operatorName = tdesc[1];
+ String className = "FilterStringCol" + operatorName + "StringCol";
+ generateFilterStringColumnCompareScalar(tdesc,className);
+ }
private void generateFilterStringColumnCompareScalar(String[] tdesc, String className) throws IOException {
String operatorSymbol = tdesc[2];
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt?rev=1480527&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt Thu May 9 04:44:17 2013
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Filter the rows in a batch by comparing one string column to another.
+ * This code is generated from a template.
+ */
+public class <ClassName> extends VectorExpression {
+ private int colNum1;
+ private int colNum2;
+
+ public <ClassName>(int colNum1, int colNum2) {
+ this.colNum1 = colNum1;
+ this.colNum2 = colNum2;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1];
+ BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2];
+ int[] sel = batch.selected;
+ boolean[] nullPos1 = inputColVector1.isNull;
+ boolean[] nullPos2 = inputColVector2.isNull;
+ int n = batch.size;
+ byte[][] vector1 = inputColVector1.vector;
+ byte[][] vector2 = inputColVector2.vector;
+ int[] start1 = inputColVector1.start;
+ int[] start2 = inputColVector2.start;
+ int[] length1 = inputColVector1.length;
+ int[] length2 = inputColVector2.length;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ // handle case where neither input has nulls
+ if (inputColVector1.noNulls && inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+
+ /* Either all must remain selected or all will be eliminated.
+ * Repeating property will not change.
+ */
+ if (!(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+
+ // handle case where only input 2 has nulls
+ } else if (inputColVector1.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (nullPos2[0] ||
+ !(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+
+ // no need to check for nulls in input 1
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (nullPos2[0]) {
+
+ // no values will qualify because every comparison will be with NULL
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else { // neither input is repeating
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ }
+
+ // handle case where only input 1 has nulls
+ } else if (inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (nullPos1[0] ||
+ !(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+ batch.size = 0;
+ return;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (nullPos1[0]) {
+
+ // if repeating value is null then every comparison will fail so nothing qualifies
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else { // neither input is repeating
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ }
+
+ // handle case where both inputs have nulls
+ } else {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (nullPos1[0] || nullPos2[0] ||
+ !(StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0)) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (nullPos1[0]) {
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos2[i]) {
+ if (StringExpr.compare(vector1[0], start1[0], length1[0],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (nullPos2[0]) {
+ batch.size = 0;
+ return;
+ }
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos1[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[0], start2[0], length2[0]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ } else { // neither input is repeating
+ if (batch.selectedInUse) {
+ int newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos1[i] && !nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ batch.size = newSize;
+ } else {
+ int newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (!nullPos1[i] && !nullPos2[i]) {
+ if (StringExpr.compare(vector1[i], start1[i], length1[i],
+ vector2[i], start2[i], length2[i]) <OperatorSymbol> 0) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ if (newSize < batch.size) {
+ batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return -1;
+ }
+}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java?rev=1480527&r1=1480526&r2=1480527&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java Thu May 9 04:44:17 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColEqualStringScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.
FilterStringColGreaterEqualStringScalar;
import org.junit.Test;
@@ -45,6 +46,7 @@ public class TestVectorStringExpressions
private static byte[] greenred;
private static byte[] redgreen;
private static byte[] greengreen;
+ private static byte[] blue;
private static byte[] emptyString;
private static byte[] mixedUp;
private static byte[] mixedUpLower;
@@ -54,6 +56,7 @@ public class TestVectorStringExpressions
static {
try {
+ blue = "blue".getBytes("UTF-8");
red = "red".getBytes("UTF-8");
redred = "redred".getBytes("UTF-8");
green = "green".getBytes("UTF-8");
@@ -164,6 +167,157 @@ public class TestVectorStringExpressions
Assert.assertTrue(batch.selected[0] == 0);
Assert.assertTrue(batch.selected[1] == 1);
}
+
+ @Test
+ public void testStringColCompareStringColFilter() {
+ VectorizedRowBatch batch;
+ VectorExpression expr;
+
+ /* input data
+ *
+ * col0 col1
+ * ===============
+ * blue red
+ * green green
+ * red blue
+ * NULL red col0 data is empty string if we un-set NULL property
+ */
+
+ // nulls possible on left, right
+ batch = makeStringBatchForColColCompare();
+ expr = new FilterStringColLessStringCol(0,1);
+ expr.evaluate(batch);
+ Assert.assertEquals(1, batch.size);
+ Assert.assertEquals(0, batch.selected[0]);
+
+ // no nulls possible
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].noNulls = true;
+ batch.cols[1].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(2, batch.size);
+ Assert.assertEquals(3, batch.selected[1]);
+
+ // nulls on left, no nulls on right
+ batch = makeStringBatchForColColCompare();
+ batch.cols[1].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(1, batch.size);
+ Assert.assertEquals(0, batch.selected[0]);
+
+ // nulls on right, no nulls on left
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].noNulls = true;
+ batch.cols[1].isNull[3] = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(1, batch.size);
+ Assert.assertEquals(0, batch.selected[0]);
+
+ // Now vary isRepeating
+ // nulls possible on left, right
+
+ // left repeats
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(3, batch.size);
+ Assert.assertEquals(3, batch.selected[2]);
+
+ // right repeats
+ batch = makeStringBatchForColColCompare();
+ batch.cols[1].isRepeating = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(2, batch.size); // first 2 qualify
+ Assert.assertEquals(1, batch.selected[1]);
+
+ // left and right repeat
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].isRepeating = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(4, batch.size);
+
+ // Now vary isRepeating
+ // nulls possible only on left
+
+ // left repeats
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(3, batch.size);
+ Assert.assertEquals(3, batch.selected[2]);
+
+ // left repeats and is null
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].noNulls = true;
+ batch.cols[0].isNull[0] = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(0, batch.size);
+
+ // right repeats
+ batch = makeStringBatchForColColCompare();
+ batch.cols[1].isRepeating = true;
+ batch.cols[1].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(3, batch.size);
+ Assert.assertEquals(1, batch.selected[1]);
+
+ // left and right repeat
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].isRepeating = true;
+ batch.cols[1].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(4, batch.size);
+
+
+ // Now vary isRepeating
+ // nulls possible only on right
+
+ // left repeats
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ batch.cols[1].isNull[0] = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(2, batch.size);
+ Assert.assertEquals(3, batch.selected[1]);
+
+ // right repeats
+ batch = makeStringBatchForColColCompare();
+ batch.cols[1].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(3, batch.size);
+ Assert.assertEquals(3, batch.selected[2]);
+
+ // right repeats and is null
+ batch = makeStringBatchForColColCompare();
+ batch.cols[1].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ batch.cols[1].isNull[0] = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(0, batch.size);
+
+ // left and right repeat
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(4, batch.size);
+
+ // left and right repeat and right is null
+ batch = makeStringBatchForColColCompare();
+ batch.cols[0].isRepeating = true;
+ batch.cols[1].isRepeating = true;
+ batch.cols[0].noNulls = true;
+ batch.cols[1].isNull[0] = true;
+ expr.evaluate(batch);
+ Assert.assertEquals(0, batch.size);
+ }
VectorizedRowBatch makeStringBatch() {
// create a batch with one string ("Bytes") column
@@ -383,6 +537,38 @@ public class TestVectorStringExpressions
batch.size = 3;
return batch;
}
+
+ private VectorizedRowBatch makeStringBatchForColColCompare() {
+ VectorizedRowBatch batch = new VectorizedRowBatch(3);
+ BytesColumnVector v = new BytesColumnVector();
+ batch.cols[0] = v;
+ BytesColumnVector v2 = new BytesColumnVector();
+ batch.cols[1] = v2;
+ batch.cols[2] = new BytesColumnVector();
+
+ v.setRef(0, blue, 0, blue.length);
+ v.isNull[0] = false;
+ v.setRef(1, green, 0, green.length);
+ v.isNull[1] = false;
+ v.setRef(2, red, 0, red.length);
+ v.isNull[2] = false;
+ v.setRef(3, emptyString, 0, emptyString.length);
+ v.isNull[3] = true;
+ v.noNulls = false;
+
+ v2.setRef(0, red, 0, red.length);
+ v2.isNull[0] = false;
+ v2.setRef(1, green, 0, green.length);
+ v2.isNull[1] = false;
+ v2.setRef(2, blue, 0, blue.length);
+ v2.isNull[2] = false;
+ v2.setRef(3, red, 0, red.length);
+ v2.isNull[3] = false;
+ v2.noNulls = false;
+
+ batch.size = 4;
+ return batch;
+ }
@Test
public void testStringLike() {