You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/23 21:00:19 UTC
svn commit: r1525674 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/optimizer/physical/
test/org/apache/hadoop/hive/ql/exec/ve...
Author: hashutosh
Date: Mon Sep 23 19:00:18 2013
New Revision: 1525674
URL: http://svn.apache.org/r1525674
Log:
HIVE-4823 : implement vectorized TRIM(), LTRIM(), RTRIM() (Eric Hanson via Ashutosh Chauhan)
Added:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Mon Sep 23 19:00:18 2013
@@ -83,6 +83,7 @@ import org.apache.hadoop.hive.ql.plan.ap
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLTrim;
import org.apache.hadoop.hive.ql.udf.UDFLength;
import org.apache.hadoop.hive.ql.udf.UDFLike;
import org.apache.hadoop.hive.ql.udf.UDFMinute;
@@ -94,8 +95,10 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFRTrim;
import org.apache.hadoop.hive.ql.udf.UDFSecond;
import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTrim;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -458,6 +461,7 @@ public class VectorizationContext {
} else if (udf instanceof GenericUDFConcat) {
return getConcatExpression(childExpr);
}
+
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
@@ -514,6 +518,12 @@ public class VectorizationContext {
return getUnaryStringExpression("StringLength", "Long", childExpr);
} else if (cl.equals(UDFSubstr.class)) {
return getSubstrExpression(childExpr);
+ } else if (cl.equals(UDFLTrim.class)) {
+ return getUnaryStringExpression("StringLTrim", "String", childExpr);
+ } else if (cl.equals(UDFRTrim.class)) {
+ return getUnaryStringExpression("StringRTrim", "String", childExpr);
+ } else if (cl.equals(UDFTrim.class)) {
+ return getUnaryStringExpression("StringTrim", "String", childExpr);
}
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java Mon Sep 23 19:00:18 2013
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class StringLTrim extends StringUnaryUDFDirect {
+ private static final long serialVersionUID = 1L;
+
+ public StringLTrim(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ }
+
+ public StringLTrim() {
+ super();
+ }
+
+ /**
+ * LTRIM element i of the vector, and place the result in outV.
+ * Operate on the data in place, and set the output by reference
+ * to improve performance. Ignore null handling. That will be handled separately.
+ */
+ protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) {
+ int j = start[i];
+
+ // skip past blank characters
+ while(j < start[i] + vector[i].length && vector[i][j] == 0x20) {
+ j++;
+ }
+
+ outV.setVal(i, vector[i], j, length[i] - (j - start[i]));
+ }
+}
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java Mon Sep 23 19:00:18 2013
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class StringRTrim extends StringUnaryUDFDirect {
+ private static final long serialVersionUID = 1L;
+
+ public StringRTrim(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ }
+
+ public StringRTrim() {
+ super();
+ }
+
+ /**
+ * RTRIM element i of the vector, and place the result in outV.
+ * Operate on the data in place, and set the output by reference
+ * to improve performance. Ignore null handling. That will be handled separately.
+ */
+ protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) {
+ int j = start[i] + length[i] - 1;
+
+ // skip trailing blank characters
+ while(j >= start[i] && vector[i][j] == 0x20) {
+ j--;
+ }
+
+ // set output vector
+ outV.setVal(i, vector[i], start[i], (j - start[i]) + 1);
+ }
+}
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java Mon Sep 23 19:00:18 2013
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class StringTrim extends StringUnaryUDFDirect {
+ private static final long serialVersionUID = 1L;
+
+ public StringTrim(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ }
+
+ public StringTrim() {
+ super();
+ }
+
+ /**
+ * TRIM element i of the vector, eliminating blanks from the left
+ * and right sides of the string, and place the result in outV.
+ * Operate on the data in place, and set the output by reference
+ * to improve performance. Ignore null handling. That will be handled separately.
+ */
+ protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) {
+ int l = start[i];
+ int r = start[i] + length[i] - 1;
+
+ // skip blank character on left
+ while(l <= r && vector[i][l] == 0x20) {
+ l++;
+ }
+
+ // skip blank characters on right
+ while(l <= r && vector[i][r] == 0x20) {
+ r--;
+ }
+
+ outV.setVal(i, vector[i], l, (r - l) + 1);
+ }
+}
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java Mon Sep 23 19:00:18 2013
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * This is a superclass for unary string functions that operate directly on the
+ * input and set the output. It is suitable for direct, in-place operations on
+ * strings, such as for fast implementations of TRIM(), LTRIM(), and RTRIM().
+ */
+abstract public class StringUnaryUDFDirect extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ int inputColumn;
+ int outputColumn;
+
+ public StringUnaryUDFDirect(int inputColumn, int outputColumn) {
+ this.inputColumn = inputColumn;
+ this.outputColumn = outputColumn;
+ }
+
+ public StringUnaryUDFDirect() {
+ super();
+ }
+
+ abstract protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i);
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
+ int[] sel = batch.selected;
+ int n = batch.size;
+ byte[][] vector = inputColVector.vector;
+ int start[] = inputColVector.start;
+ int length[] = inputColVector.length;
+ BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+ outV.initBuffer();
+
+ if (n == 0) {
+ //Nothing to do
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ outV.noNulls = true;
+ if (inputColVector.isRepeating) {
+ outV.isRepeating = true;
+ func(outV, vector, start, length, 0);
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ func(outV, vector, start, length, i);
+ }
+ outV.isRepeating = false;
+ } else {
+ for(int i = 0; i != n; i++) {
+ func(outV, vector, start, length, i);
+ }
+ outV.isRepeating = false;
+ }
+ } else {
+
+ // Handle case with nulls. Don't do function if the value is null,
+ // because the data may be undefined for a null value.
+ outV.noNulls = false;
+ if (inputColVector.isRepeating) {
+ outV.isRepeating = true;
+ outV.isNull[0] = inputColVector.isNull[0];
+ if (!inputColVector.isNull[0]) {
+ func(outV, vector, start, length, 0);
+ }
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outV, vector, start, length, i);
+ }
+ }
+ outV.isRepeating = false;
+ } else {
+ System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inputColVector.isNull[i]) {
+ func(outV, vector, start, length, i);
+ }
+ }
+ outV.isRepeating = false;
+ }
+ }
+ }
+
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public int getInputColumn() {
+ return inputColumn;
+ }
+
+ public void setInputColumn(int inputColumn) {
+ this.inputColumn = inputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "String";
+ }
+
+}
\ No newline at end of file
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java Mon Sep 23 19:00:18 2013
@@ -37,7 +37,7 @@ public abstract class VectorExpression i
* This is the primary method to implement expression logic.
* @param vrg
*/
- public abstract void evaluate(VectorizedRowBatch vrg);
+ public abstract void evaluate(VectorizedRowBatch batch);
/**
* Returns the index of the output column in the array
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Mon Sep 23 19:00:18 2013
@@ -70,6 +70,7 @@ import org.apache.hadoop.hive.ql.plan.Pa
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLTrim;
import org.apache.hadoop.hive.ql.udf.UDFLength;
import org.apache.hadoop.hive.ql.udf.UDFLike;
import org.apache.hadoop.hive.ql.udf.UDFMinute;
@@ -80,8 +81,10 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFRTrim;
import org.apache.hadoop.hive.ql.udf.UDFSecond;
import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTrim;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -161,6 +164,10 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(UDFLike.class);
supportedGenericUDFs.add(UDFSubstr.class);
+ supportedGenericUDFs.add(UDFLTrim.class);
+ supportedGenericUDFs.add(UDFRTrim.class);
+ supportedGenericUDFs.add(UDFTrim.class);
+
supportedGenericUDFs.add(GenericUDFLower.class);
supportedGenericUDFs.add(GenericUDFUpper.class);
supportedGenericUDFs.add(GenericUDFConcat.class);
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java Mon Sep 23 19:00:18 2013
@@ -55,6 +55,10 @@ public class TestVectorStringExpressions
private static byte[] mixedUpUpper;
private static byte[] multiByte;
private static byte[] mixPercentPattern;
+ private static byte[] blanksLeft;
+ private static byte[] blanksRight;
+ private static byte[] blanksBoth;
+ private static byte[] blankString;
static {
try {
@@ -72,6 +76,10 @@ public class TestVectorStringExpressions
mixPercentPattern = "mix%".getBytes("UTF-8"); // for use as wildcard pattern to test LIKE
multiByte = new byte[100];
addMultiByteChars(multiByte);
+ blanksLeft = " foo".getBytes("UTF-8");
+ blanksRight = "foo ".getBytes("UTF-8");
+ blanksBoth = " foo ".getBytes("UTF-8");
+ blankString = " ".getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
@@ -1405,4 +1413,81 @@ public class TestVectorStringExpressions
)
);
}
+
+ @Test
+ public void testVectorLTrim() {
+ VectorizedRowBatch b = makeTrimBatch();
+ VectorExpression expr = new StringLTrim(0, 1);
+ expr.evaluate(b);
+ BytesColumnVector outV = (BytesColumnVector) b.cols[1];
+ Assert.assertEquals(0,
+ StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksLeft, 2, 3, outV.vector[1], outV.start[1], outV.length[1]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksRight, 0, 5, outV.vector[2], outV.start[2], outV.length[2]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksBoth, 2, 5, outV.vector[3], outV.start[3], outV.length[3]));
+ Assert.assertEquals(0,
+ StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
+ }
+
+ @Test
+ public void testVectorRTrim() {
+ VectorizedRowBatch b = makeTrimBatch();
+ VectorExpression expr = new StringRTrim(0, 1);
+ expr.evaluate(b);
+ BytesColumnVector outV = (BytesColumnVector) b.cols[1];
+ Assert.assertEquals(0,
+ StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksLeft, 0, 5, outV.vector[1], outV.start[1], outV.length[1]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksRight, 0, 3, outV.vector[2], outV.start[2], outV.length[2]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksBoth, 0, 5, outV.vector[3], outV.start[3], outV.length[3]));
+ Assert.assertEquals(0,
+ StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
+ }
+
+ @Test
+ public void testVectorTrim() {
+ VectorizedRowBatch b = makeTrimBatch();
+ VectorExpression expr = new StringTrim(0, 1);
+ expr.evaluate(b);
+ BytesColumnVector outV = (BytesColumnVector) b.cols[1];
+ Assert.assertEquals(0,
+ StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksLeft, 2, 3, outV.vector[1], outV.start[1], outV.length[1]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksRight, 0, 3, outV.vector[2], outV.start[2], outV.length[2]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blanksBoth, 2, 3, outV.vector[3], outV.start[3], outV.length[3]));
+ Assert.assertEquals(0,
+ StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
+ Assert.assertEquals(0,
+ StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
+ }
+
+ // Make a batch to test the trim functions.
+ private VectorizedRowBatch makeTrimBatch() {
+ VectorizedRowBatch b = new VectorizedRowBatch(2);
+ BytesColumnVector inV = new BytesColumnVector();
+ BytesColumnVector outV = new BytesColumnVector();
+ b.cols[0] = inV;
+ b.cols[1] = outV;
+ inV.setRef(0, emptyString, 0, 0);
+ inV.setRef(1, blanksLeft, 0, blanksLeft.length);
+ inV.setRef(2, blanksRight, 0, blanksRight.length);
+ inV.setRef(3, blanksBoth, 0, blanksBoth.length);
+ inV.setRef(4, red, 0, red.length);
+ inV.setRef(5, blankString, 0, blankString.length);
+ b.size = 5;
+ return b;
+ }
}