You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:37 UTC
[23/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
index aecaed2..1191b31 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -26,8 +28,8 @@ public class LongScalarNotEqualLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- private final int colNum;
- private final long value;
+ protected final int colNum;
+ protected final long value;
public LongScalarNotEqualLongColumn(long value, int colNum, int outputColumnNum) {
super(outputColumnNum);
@@ -53,8 +55,8 @@ public class LongScalarNotEqualLongColumn extends VectorExpression {
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
- boolean[] outNulls = outputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -64,44 +66,72 @@ public class LongScalarNotEqualLongColumn extends VectorExpression {
return;
}
+ // We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
outputVector[0] = value != vector[0] ? 1 : 0;
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
- for(int j=0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = value != vector[i] ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
+ outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
+ outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
// The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63"
outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = value != vector[0] ? 1 : 0;
- outNulls[0] = false;
- } else {
- outNulls[0] = true;
- }
- outputColVector.isRepeating = true;
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
- outputVector[i] = value != vector[i] ? 1 : 0;
- outNulls[i] = nullPos[i];
+ outputIsNull[i] = inputIsNull[i];
+ outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
}
} else {
- System.arraycopy(nullPos, 0, outNulls, 0, n);
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
index c52e337..0976f20 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -44,7 +46,7 @@ abstract public class LongToStringUnaryUDF extends VectorExpression {
inputColumn = -1;
}
- abstract protected void func(BytesColumnVector outV, long[] vector, int i);
+ abstract protected void func(BytesColumnVector outputColVector, long[] vector, int i);
@Override
public void evaluate(VectorizedRowBatch batch) {
@@ -57,59 +59,87 @@ abstract public class LongToStringUnaryUDF extends VectorExpression {
int[] sel = batch.selected;
int n = batch.size;
long[] vector = inputColVector.vector;
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
- outV.initBuffer();
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
+ outputColVector.initBuffer();
if (n == 0) {
//Nothing to do
return;
}
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ func(outputColVector, vector, 0);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- func(outV, vector, 0);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- func(outV, vector, i);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ func(outputColVector, vector, i);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ func(outputColVector, vector, i);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- func(outV, vector, i);
+ func(outputColVector, vector, i);
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
// Handle case with nulls. Don't do function if the value is null,
// because the data may be undefined for a null value.
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- func(outV, vector, 0);
- }
- } else if (batch.selectedInUse) {
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outV.isNull[i] = inputColVector.isNull[i];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- func(outV, vector, i);
+ func(outputColVector, vector, i);
}
}
- outV.isRepeating = false;
+ outputColVector.isRepeating = false;
} else {
- System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
- func(outV, vector, i);
+ func(outputColVector, vector, i);
}
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
index ccc0fcb..aad408f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -64,7 +66,6 @@ public abstract class MathFuncDoubleToDouble extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -74,38 +75,69 @@ public abstract class MathFuncDoubleToDouble extends VectorExpression {
return;
}
- if (inputColVector.isRepeating) {
- outputVector[0] = func(vector[0]);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = func(vector[0]);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ cleanup(outputColVector, sel, batch.selectedInUse, n);
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = func(vector[i]);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = func(vector[i]);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = func(vector[i]);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = func(vector[i]);
outputIsNull[i] = inputIsNull[i];
- }
+ outputVector[i] = func(vector[i]);
+ }
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
cleanup(outputColVector, sel, batch.selectedInUse, n);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
index 3375a56..dcebc24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -63,7 +65,6 @@ public abstract class MathFuncLongToDouble extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
long[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
@@ -73,38 +74,69 @@ public abstract class MathFuncLongToDouble extends VectorExpression {
return;
}
- if (inputColVector.isRepeating) {
- outputVector[0] = func(vector[0]);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = func(vector[0]);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ cleanup(outputColVector, sel, batch.selectedInUse, n);
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = func(vector[i]);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = func(vector[i]);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = func(vector[i]);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = func(vector[i]);
outputIsNull[i] = inputIsNull[i];
- }
+ outputVector[i] = func(vector[i]);
+ }
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
cleanup(outputColVector, sel, batch.selectedInUse, n);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
index 898cf96..e5b6902 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -62,7 +64,6 @@ public abstract class MathFuncLongToLong extends VectorExpression {
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
- outputColVector.noNulls = inputColVector.noNulls;
int n = batch.size;
long[] vector = inputColVector.vector;
long[] outputVector = outputColVector.vector;
@@ -72,38 +73,68 @@ public abstract class MathFuncLongToLong extends VectorExpression {
return;
}
- if (inputColVector.isRepeating) {
- outputVector[0] = func(vector[0]);
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- // Even if there are no nulls, we always copy over entry 0. Simplifies code.
- outputIsNull[0] = inputIsNull[0];
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ outputIsNull[0] = false;
+ outputVector[0] = func(vector[0]);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
outputColVector.isRepeating = true;
- } else if (inputColVector.noNulls) {
+ return;
+ }
+
+ if (inputColVector.noNulls) {
if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = func(vector[i]);
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = func(vector[i]);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = func(vector[i]);
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
}
- outputColVector.isRepeating = false;
- } else /* there are nulls */ {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = func(vector[i]);
outputIsNull[i] = inputIsNull[i];
- }
+ outputVector[i] = func(vector[i]);
+ }
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
- System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
- outputColVector.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
index 30f20f3..be69f7f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -54,45 +56,61 @@ public class NotCol extends VectorExpression {
long[] vector = inputColVector.vector;
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
long[] outputVector = outV.vector;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outV.isNull;
if (n <= 0) {
// Nothing to do, this is EOF
return;
}
- if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
// 0 XOR 1 yields 1, 1 XOR 1 yields 0
outputVector[0] = vector[0] ^ 1;
- } else if (batch.selectedInUse) {
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
+ outV.isNull[i] = false;
outputVector[i] = vector[i] ^ 1;
}
- outV.isRepeating = false;
} else {
+ Arrays.fill(outV.isNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = vector[i] ^ 1;
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outputVector[0] = vector[0] ^ 1;
- outV.isNull[0] = inputColVector.isNull[0];
- } else if (batch.selectedInUse) {
- outV.isRepeating = false;
+
+ if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] ^ 1;
outV.isNull[i] = inputColVector.isNull[i];
}
} else {
- outV.isRepeating = false;
for (int i = 0; i != n; i++) {
outputVector[i] = vector[i] ^ 1;
outV.isNull[i] = inputColVector.isNull[i];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
index eaaade6..3c18853 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
@@ -284,25 +284,56 @@ public class NullUtil {
}
/*
- * Propagate null values for a two-input operator.
+ * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
*/
public static void propagateNullsColCol(ColumnVector inputColVector1,
ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel,
int n, boolean selectedInUse) {
- outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- if (outputColVector.noNulls) {
- // the inputs might not always have isNull initialized for
- // inputColVector1.isNull[i] || inputColVector2.isNull[i] to be valid
- Arrays.fill(outputColVector.isNull, false);
- return;
- }
+ if (inputColVector1.noNulls && inputColVector2.noNulls) {
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ outputColVector.isNull[0] = false;
+ outputColVector.isRepeating = true;
+ } else {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = false;
+ }
+ } else {
+ Arrays.fill(outputColVector.isNull, 0, n, false);
+ }
+ }
+ } else if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- if (inputColVector1.noNulls && !inputColVector2.noNulls) {
- if (inputColVector2.isRepeating) {
- outputColVector.isNull[0] = inputColVector2.isNull[0];
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (!inputColVector2.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputColVector2.isRepeating) {
+ if (!inputColVector2.isNull[0]) {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = false;
+ }
+ } else {
+ Arrays.fill(outputColVector.isNull, 0, n, false);
+ }
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true; // Because every value will be NULL.
+ }
} else {
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -313,9 +344,32 @@ public class NullUtil {
}
}
} else if (!inputColVector1.noNulls && inputColVector2.noNulls) {
- if (inputColVector1.isRepeating) {
- outputColVector.isNull[0] = inputColVector1.isNull[0];
+
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ if (!inputColVector1.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ } else if (inputColVector1.isRepeating) {
+ if (!inputColVector1.isNull[0]) {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputColVector.isNull[i] = false;
+ }
+ } else {
+ Arrays.fill(outputColVector.isNull, 0, n, false);
+ }
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true; // Because every value will be NULL.
+ }
} else {
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -326,18 +380,23 @@ public class NullUtil {
}
}
} else if (!inputColVector1.noNulls && !inputColVector2.noNulls) {
+
if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0];
- if (outputColVector.isNull[0]) {
- outputColVector.isRepeating = true;
- return;
+ if (!inputColVector1.isNull[0] && !inputColVector2.isNull[0]) {
+ outputColVector.isNull[0] = false;
+ } else {
+ outputColVector.isNull[0] = true;
+ outputColVector.noNulls = false;
}
+ outputColVector.isRepeating = true;
} else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) {
+
if (inputColVector1.isNull[0]) {
outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true; // because every value will be NULL
- return;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true; // Because every value will be NULL.
} else {
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -352,9 +411,10 @@ public class NullUtil {
} else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) {
if (inputColVector2.isNull[0]) {
outputColVector.isNull[0] = true;
- outputColVector.isRepeating = true; // because every value will be NULL
- return;
+ outputColVector.noNulls = false;
+ outputColVector.isRepeating = true; // Because every value will be NULL.
} else {
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
@@ -366,6 +426,7 @@ public class NullUtil {
}
}
} else { // neither side is repeating
+ outputColVector.noNulls = false;
if (selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
index bfd7334..62873e9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -49,33 +51,68 @@ public class OctetLength extends VectorExpression {
}
BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
int[] sel = batch.selected;
int n = batch.size;
int [] length = inputColVector.length;
- long[] resultLen = outV.vector;
+ long[] resultLen = outputColVector.vector;
if (n == 0) {
//Nothing to do
return;
}
- if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
resultLen[0] = length[0];
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- resultLen[i] = length[i];
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ resultLen[i] = length[i];
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ resultLen[i] = length[i];
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
resultLen[i] = length[i];
}
- outV.isRepeating = false;
}
} else {
@@ -83,30 +120,23 @@ public class OctetLength extends VectorExpression {
* Handle case with nulls. Don't do function if the value is null, to save time,
* because calling the function can be expensive.
*/
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- resultLen[0] = length[0];
- }
- } else if (batch.selectedInUse) {
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- if (!inputColVector.isNull[i]) {
+ outputIsNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
resultLen[i] = length[i];
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
} else {
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!inputColVector.isNull[i]) {
+ if (!inputIsNull[i]) {
resultLen[i] = length[i];
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
index 20a0a37..db684c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java
@@ -15,10 +15,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
import org.apache.hadoop.hive.ql.exec.vector.expressions.AbstractFilterStringColLikeStringScalar.Checker;
@@ -70,42 +71,50 @@ public class SelectStringColLikeStringScalar extends VectorExpression {
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
long[] outputVector = outV.vector;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outV.isNull;
// return immediately if batch is empty
if (n == 0) {
return;
}
- outV.noNulls = inputColVector.noNulls;
- outV.isRepeating = inputColVector.isRepeating;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0);
- outV.isNull[0] = false;
- } else if (batch.selectedInUse) {
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
- outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
outV.isNull[i] = false;
+ outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
}
} else {
+ Arrays.fill(outV.isNull, 0, n, false);
for (int i = 0; i != n; i++) {
outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0);
- outV.isNull[i] = false;
}
}
- } else {
- if (inputColVector.isRepeating) {
- //All must be selected otherwise size would be zero. Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0);
- outV.isNull[0] = false;
- } else {
- outputVector[0] = LongColumnVector.NULL_VALUE;
- outV.isNull[0] = true;
- }
- } else if (batch.selectedInUse) {
+ } else /* there are nulls in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!nullPos[i]) {
@@ -114,6 +123,7 @@ public class SelectStringColLikeStringScalar extends VectorExpression {
} else {
outputVector[i] = LongColumnVector.NULL_VALUE;
outV.isNull[i] = true;
+ outV.noNulls = false;
}
}
} else {
@@ -124,11 +134,12 @@ public class SelectStringColLikeStringScalar extends VectorExpression {
} else {
outputVector[i] = LongColumnVector.NULL_VALUE;
outV.isNull[i] = true;
+ outV.noNulls = false;
}
}
}
}
- }
+ }
private Checker borrowChecker() {
FilterStringColLikeStringScalar fil = new FilterStringColLikeStringScalar();
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
index c889ac1..eb91321 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
@@ -74,57 +74,82 @@ public class StringColumnInList extends VectorExpression implements IStringInExp
BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
- boolean[] nullPos = inputColVector.isNull;
+ boolean[] inputIsNull = inputColVector.isNull;
int n = batch.size;
byte[][] vector = inputColVector.vector;
int[] start = inputColVector.start;
int[] len = inputColVector.length;
long[] outputVector = outputColVector.vector;
+ boolean[] outputIsNull = outputColVector.isNull;
// return immediately if batch is empty
if (n == 0) {
return;
}
- outputColVector.isRepeating = inputColVector.isRepeating;
- outputColVector.noNulls = inputColVector.noNulls;
- if (inputColVector.noNulls) {
- if (inputColVector.isRepeating) {
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ }
}
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
}
}
- } else {
- if (inputColVector.isRepeating) {
+ } else /* there are nulls in the inputColVector */ {
- // All must be selected otherwise size would be zero
- // Repeating property will not change.
- if (!nullPos[0]) {
- outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
- }
- outputColVector.isNull[0] = nullPos[0];
- } else if (batch.selectedInUse) {
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
- if (!nullPos[i]) {
+ outputColVector.isNull[i] = inputIsNull[i];
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
}
- outputColVector.isNull[i] = nullPos[i];
}
} else {
- System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n);
+ System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n);
for(int i = 0; i != n; i++) {
- if (!nullPos[i]) {
+ if (!inputIsNull[i]) {
outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
index f730c9d..6c92e39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -56,12 +57,14 @@ public class StringGroupColConcatStringScalar extends VectorExpression {
}
BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
int n = batch.size;
byte[][] vector = inputColVector.vector;
int[] start = inputColVector.start;
int[] length = inputColVector.length;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -70,55 +73,79 @@ public class StringGroupColConcatStringScalar extends VectorExpression {
}
// initialize output vector buffer to receive data
- outV.initBuffer();
+ outputColVector.initBuffer();
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ outputColVector.setConcat(0, vector[0], start[0], length[0], value, 0, value.length);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+ outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
/*
* Handle case with nulls. Don't do function if the value is null, to save time,
* because calling the function can be expensive.
*/
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length);
- }
- } else if (batch.selectedInUse) {
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+ outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
} else {
for(int i = 0; i != n; i++) {
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
- outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
+ outputColVector.setConcat(i, vector[i], start[i], length[i], value, 0, value.length);
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
index cbdcc76..6c40a28 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -57,6 +59,7 @@ public class StringGroupConcatColCol extends VectorExpression {
BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1];
BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2];
BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+ boolean[] outputIsNull = outV.isNull;
int[] sel = batch.selected;
int n = batch.size;
byte[][] vector1 = inV1.vector;
@@ -81,7 +84,7 @@ public class StringGroupConcatColCol extends VectorExpression {
if (inV1.noNulls && !inV2.noNulls) {
- // propagate nulls
+ // Carefully handle NULLs...
/* We'll assume that there *may* be nulls in the input if !noNulls is true
* for an input vector. This is to be more forgiving of errors in loading
@@ -89,6 +92,7 @@ public class StringGroupConcatColCol extends VectorExpression {
* isNull[0] is set if !noNulls and isRepeating are true for the vector.
*/
outV.noNulls = false;
+
if (inV2.isRepeating) {
if (inV2.isNull[0]) {
@@ -321,8 +325,9 @@ public class StringGroupConcatColCol extends VectorExpression {
}
} else { // there are no nulls in either input vector
- // propagate null information
- outV.noNulls = true;
+ /*
+ * Do careful maintenance of the outputColVector.noNulls flag.
+ */
// perform data operation
if (inV1.isRepeating && inV2.isRepeating) {
@@ -330,13 +335,16 @@ public class StringGroupConcatColCol extends VectorExpression {
// All must be selected otherwise size would be zero. Repeating property will not change.
outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
outV.isRepeating = true;
+ outputIsNull[0] = false;
} else if (inV1.isRepeating) {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
}
@@ -345,9 +353,11 @@ public class StringGroupConcatColCol extends VectorExpression {
if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
}
@@ -356,9 +366,11 @@ public class StringGroupConcatColCol extends VectorExpression {
if (batch.selectedInUse) {
for(int j=0; j != n; j++) {
int i = sel[j];
+ outputIsNull[i] = false;
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
} else {
+ Arrays.fill(outputIsNull, 0, n, false);
for(int i = 0; i != n; i++) {
outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
index 9b9c063..f1fabb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -53,66 +55,88 @@ public class StringLength extends VectorExpression {
}
BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
- LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
int n = batch.size;
byte[][] vector = inputColVector.vector;
int [] start = inputColVector.start;
int [] length = inputColVector.length;
- long[] resultLen = outV.vector;
+ long[] resultLen = outputColVector.vector;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
//Nothing to do
return;
}
- if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
resultLen[0] = utf8StringLength(vector[0], start[0], length[0]);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are nulls in the inputColVector */ {
- /*
- * Handle case with nulls. Don't do function if the value is null, to save time,
- * because calling the function can be expensive.
- */
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- resultLen[0] = utf8StringLength(vector[0], start[0], length[0]);
- }
- } else if (batch.selectedInUse) {
+ // Carefully handle NULLs...
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
+ outputColVector.isRepeating = false;
} else {
for(int i = 0; i != n; i++) {
+ outputColVector.isNull[i] = inputColVector.isNull[i];
if (!inputColVector.isNull[i]) {
resultLen[i] = utf8StringLength(vector[i], start[i], length[i]);
}
- outV.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
index 94fbef8..a9f09dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -56,12 +57,14 @@ public class StringScalarConcatStringGroupCol extends VectorExpression {
}
BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
int n = batch.size;
byte[][] vector = inputColVector.vector;
int[] start = inputColVector.start;
int[] length = inputColVector.length;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
if (n == 0) {
@@ -70,55 +73,79 @@ public class StringScalarConcatStringGroupCol extends VectorExpression {
}
// initialize output vector buffer to receive data
- outV.initBuffer();
+ outputColVector.initBuffer();
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
+
+ if (inputColVector.isRepeating) {
+ if (inputColVector.noNulls || !inputIsNull[0]) {
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[0] = false;
+ outputColVector.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
+ } else {
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ }
+ outputColVector.isRepeating = true;
+ return;
+ }
if (inputColVector.noNulls) {
- outV.noNulls = true;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
- } else if (batch.selectedInUse) {
- for(int j = 0; j != n; j++) {
- int i = sel[j];
- outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+ }
+ } else {
+ for(int j = 0; j != n; j++) {
+ final int i = sel[j];
+ outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+ }
}
- outV.isRepeating = false;
} else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
for(int i = 0; i != n; i++) {
- outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+ outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
}
- outV.isRepeating = false;
}
- } else {
+ } else /* there are NULLs in the inputColVector */ {
+
+ // Carefully handle NULLs...
/*
* Handle case with nulls. Don't do function if the value is null, to save time,
* because calling the function can be expensive.
*/
- outV.noNulls = false;
- if (inputColVector.isRepeating) {
- outV.isRepeating = true;
- outV.isNull[0] = inputColVector.isNull[0];
- if (!inputColVector.isNull[0]) {
- outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]);
- }
- } else if (batch.selectedInUse) {
+ outputColVector.noNulls = false;
+
+ if (batch.selectedInUse) {
for(int j = 0; j != n; j++) {
int i = sel[j];
if (!inputColVector.isNull[i]) {
- outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+ outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
}
- outV.isNull[i] = inputColVector.isNull[i];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
} else {
for(int i = 0; i != n; i++) {
if (!inputColVector.isNull[i]) {
- outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
+ outputColVector.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
}
- outV.isNull[i] = inputColVector.isNull[i];
+ outputColVector.isNull[i] = inputColVector.isNull[i];
}
- outV.isRepeating = false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
index 5934f6f..7c58838 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -125,7 +126,7 @@ public class StringSubstrColStart extends VectorExpression {
}
BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum];
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int n = batch.size;
@@ -137,82 +138,101 @@ public class StringSubstrColStart extends VectorExpression {
int[] sel = batch.selected;
int[] len = inV.length;
int[] start = inV.start;
- outV.initBuffer();
+ outputColVector.initBuffer();
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
if (inV.isRepeating) {
- outV.isRepeating = true;
if (!inV.noNulls && inV.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
return;
} else {
- outV.noNulls = true;
+ outputIsNull[0] = false;
int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx);
if (offset != -1) {
- outV.setVal(0, vector[0], offset, len[0] - (offset - start[0]));
+ outputColVector.setVal(0, vector[0], offset, len[0] - (offset - start[0]));
} else {
- outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
}
}
- } else {
- outV.isRepeating = false;
- if (batch.selectedInUse) {
- if (!inV.noNulls) {
- outV.noNulls = false;
- for (int i = 0; i != n; ++i) {
- int selected = sel[i];
- if (!inV.isNull[selected]) {
- int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected],
- startIdx);
- outV.isNull[selected] = false;
- if (offset != -1) {
- outV.setVal(selected, vector[selected], offset,
- len[selected] - (offset - start[selected]));
- } else {
- outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
- }
- } else {
- outV.isNull[selected] = true;
- }
- }
- } else {
- outV.noNulls = true;
- for (int i = 0; i != n; ++i) {
- int selected = sel[i];
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (batch.selectedInUse) {
+ if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ for (int i = 0; i != n; ++i) {
+ int selected = sel[i];
+ if (!inV.isNull[selected]) {
+ outputIsNull[selected] = false;
int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected],
startIdx);
+ outputColVector.isNull[selected] = false;
if (offset != -1) {
- outV.setVal(selected, vector[selected], offset,
+ outputColVector.setVal(selected, vector[selected], offset,
len[selected] - (offset - start[selected]));
} else {
- outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
}
+ } else {
+ outputColVector.isNull[selected] = true;
+ outputColVector.noNulls = false;
}
}
} else {
- if (!inV.noNulls) {
- outV.noNulls = false;
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
- for (int i = 0; i != n; ++i) {
- if (!inV.isNull[i]) {
- int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
- if (offset != -1) {
- outV.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
- } else {
- outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
- }
- }
+ for (int i = 0; i != n; ++i) {
+ int selected = sel[i];
+ outputColVector.isNull[selected] = false;
+ int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected],
+ startIdx);
+ if (offset != -1) {
+ outputColVector.setVal(selected, vector[selected], offset,
+ len[selected] - (offset - start[selected]));
+ } else {
+ outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
}
- } else {
- outV.noNulls = true;
- for (int i = 0; i != n; ++i) {
+ }
+ }
+ } else {
+ if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ for (int i = 0; i != n; ++i) {
+ if (!inV.isNull[i]) {
+ outputColVector.isNull[i] = false;
int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
if (offset != -1) {
- outV.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
+ outputColVector.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
} else {
- outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
}
+ } else {
+ outputColVector.isNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for (int i = 0; i != n; ++i) {
+ int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx);
+ if (offset != -1) {
+ outputColVector.setVal(i, vector[i], offset, len[i] - (offset - start[i]));
+ } else {
+ outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
index 9d6eccf..7c5d19a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -146,7 +147,7 @@ public class StringSubstrColStartLen extends VectorExpression {
}
BytesColumnVector inV = (BytesColumnVector) batch.cols[colNum];
- BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum];
+ BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
int n = batch.size;
@@ -158,82 +159,98 @@ public class StringSubstrColStartLen extends VectorExpression {
int[] sel = batch.selected;
int[] len = inV.length;
int[] start = inV.start;
- outV.initBuffer();
+ outputColVector.initBuffer();
+ boolean[] outputIsNull = outputColVector.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outputColVector.isRepeating = false;
if (inV.isRepeating) {
- outV.isRepeating = true;
+
if (!inV.noNulls && inV.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
- return;
+ outputIsNull[0] = true;
+ outputColVector.noNulls = false;
+ outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
} else {
- outV.noNulls = true;
+ outputIsNull[0] = false;
populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray);
if (offsetArray[0] != -1) {
- outV.setVal(0, vector[0], offsetArray[0], offsetArray[1]);
+ outputColVector.setVal(0, vector[0], offsetArray[0], offsetArray[1]);
} else {
- outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
}
}
- } else {
- outV.isRepeating = false;
- if (batch.selectedInUse) {
- if (!inV.noNulls) {
- outV.noNulls = false;
- for (int i = 0; i != n; ++i) {
- int selected = sel[i];
- if (!inV.isNull[selected]) {
- outV.isNull[selected] = false;
- populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx,
- length, offsetArray);
- if (offsetArray[0] != -1) {
- outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
- } else {
- outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
- }
- } else {
- outV.isNull[selected] = true;
- }
- }
- } else {
- outV.noNulls = true;
- for (int i = 0; i != n; ++i) {
- int selected = sel[i];
- outV.isNull[selected] = false;
+ outputColVector.isRepeating = true;
+ return;
+ }
+
+ if (batch.selectedInUse) {
+ if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ for (int i = 0; i != n; ++i) {
+ int selected = sel[i];
+ if (!inV.isNull[selected]) {
+ outputIsNull[selected] = false;
populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx,
length, offsetArray);
if (offsetArray[0] != -1) {
- outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
+ outputColVector.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
} else {
- outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
}
+ } else {
+ outputIsNull[selected] = true;
+ outputColVector.noNulls = false;
}
}
} else {
- if (!inV.noNulls) {
- System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
- outV.noNulls = false;
- for (int i = 0; i != n; ++i) {
- if (!inV.isNull[i]) {
- populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray);
- if (offsetArray[0] != -1) {
- outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
- } else {
- outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
- }
- }
+ for (int i = 0; i != n; ++i) {
+ int selected = sel[i];
+ outputColVector.isNull[selected] = false;
+ populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx,
+ length, offsetArray);
+ if (offsetArray[0] != -1) {
+ outputColVector.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]);
+ } else {
+ outputColVector.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length);
}
- } else {
- outV.noNulls = true;
- for (int i = 0; i != n; ++i) {
- outV.isNull[i] = false;
+ }
+ }
+ } else {
+ if (!inV.noNulls) /* there are nulls in the inputColVector */ {
+
+ // Carefully handle NULLs...
+
+ for (int i = 0; i != n; ++i) {
+ if (!inV.isNull[i]) {
+ outputIsNull[i] = false;
populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray);
if (offsetArray[0] != -1) {
- outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
+ outputColVector.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
} else {
- outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
+ outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
}
+ } else {
+ outputIsNull[i] = true;
+ outputColVector.noNulls = false;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for (int i = 0; i != n; ++i) {
+ populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray);
+ if (offsetArray[0] != -1) {
+ outputColVector.setVal(i, vector[i], offsetArray[0], offsetArray[1]);
+ } else {
+ outputColVector.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length);
}
}
}