You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/03 04:04:49 UTC
[2/2] hive git commit: HIVE-20294: Vectorization: Fix NULL / Wrong
Results issues in COALESCE / ELT (Matt McCline, reviewed by Teddy Choi)
HIVE-20294: Vectorization: Fix NULL / Wrong Results issues in COALESCE / ELT (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/66ea3266
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/66ea3266
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/66ea3266
Branch: refs/heads/master
Commit: 66ea3266cb9209f8a201f169be626ba878d5b763
Parents: e9896bf
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Aug 2 23:04:34 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Aug 2 23:04:34 2018 -0500
----------------------------------------------------------------------
.../ql/exec/vector/expressions/DecimalUtil.java | 2 +-
.../vector/expressions/ListIndexColColumn.java | 472 ++++++++++++++-
.../vector/expressions/ListIndexColScalar.java | 128 ++++-
.../expressions/VectorUDFMapIndexBase.java | 89 ---
.../expressions/VectorUDFMapIndexBaseCol.java | 402 +++++++++++--
.../VectorUDFMapIndexBaseScalar.java | 138 ++++-
.../VectorUDFMapIndexDecimalCol.java | 76 +++
.../VectorUDFMapIndexDecimalScalar.java | 96 ++++
.../expressions/VectorUDFMapIndexDoubleCol.java | 15 +-
.../VectorUDFMapIndexDoubleScalar.java | 32 +-
.../expressions/VectorUDFMapIndexLongCol.java | 15 +-
.../VectorUDFMapIndexLongScalar.java | 21 +-
.../expressions/VectorUDFMapIndexStringCol.java | 32 +-
.../VectorUDFMapIndexStringScalar.java | 34 +-
.../hive/ql/udf/generic/GenericUDFIndex.java | 7 +-
.../ql/exec/vector/VectorRandomRowSource.java | 57 +-
.../vector/expressions/TestVectorBetweenIn.java | 4 +-
.../expressions/TestVectorCoalesceElt.java | 502 ++++++++++++++++
.../vector/expressions/TestVectorIndex.java | 575 +++++++++++++++++++
.../exec/vector/expressions/TestVectorNull.java | 6 +-
.../llap/parquet_map_type_vectorization.q.out | 4 +-
.../parquet_map_type_vectorization.q.out | 4 +-
22 files changed, 2452 insertions(+), 259 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
index db040f1..70a9a9c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java
@@ -30,7 +30,7 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
public class DecimalUtil {
public static int compare(HiveDecimalWritable writableLeft, HiveDecimal right) {
- return writableLeft.getHiveDecimal().compareTo(right);
+ return writableLeft.compareTo(right);
}
public static int compare(HiveDecimal left, HiveDecimalWritable writableRight) {
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
index 55417cf..2992bff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -47,6 +49,13 @@ public class ListIndexColColumn extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -56,48 +65,459 @@ public class ListIndexColColumn extends VectorExpression {
ColumnVector childV = listV.child;
LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum];
long[] indexV = indexColumnVector.vector;
+ int[] sel = batch.selected;
+ boolean[] indexIsNull = indexColumnVector.isNull;
+ boolean[] listIsNull = listV.isNull;
+ boolean[] outputIsNull = outV.isNull;
// We do not need to do a column reset since we are carefully changing the output.
outV.isRepeating = false;
+ /*
+ * List indices are 0-based.
+ *
+ * Do careful maintenance of the outputColVector.noNulls flag since the index may be
+ * out-of-bounds.
+ */
+
+ if (indexColumnVector.isRepeating) {
+
+ /*
+ * Repeated index or repeated NULL index.
+ */
+ if (indexColumnVector.noNulls || !indexIsNull[0]) {
+ final long repeatedLongIndex = indexV[0];
+ if (repeatedLongIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Same INDEX for entire batch. Still need to validate the LIST upper limit.
+ */
+ if (listV.isRepeating) {
+ if (listV.noNulls || !listIsNull[0]) {
+ final long repeatedLongListLength = listV.lengths[0];
+ if (repeatedLongIndex >= repeatedLongListLength) {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[0] = false;
+ outV.setElement(0, (int) (listV.offsets[0] + repeatedLongIndex), childV);
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with *repeated* INDEX instance.
+ */
+ if (listV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ }
+ } else {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ }
+ }
+ } else /* there are NULLs in the LIST */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (repeatedLongIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + repeatedLongIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Same LIST for entire batch. Still need to validate the LIST upper limit against varing
+ * INDEX.
+ *
+ * (Repeated INDEX case handled above).
+ */
+
if (listV.isRepeating) {
- if (listV.isNull[0]) {
- outV.isNull[0] = true;
+ if (listV.noNulls || !listIsNull[0]) {
+
+ /*
+ * Individual row processing for INDEX vector with *repeated* LIST value.
+ */
+ final long repeatedLongListOffset = listV.offsets[0];
+ final long repeatedLongListLength = listV.lengths[0];
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (longIndex >= repeatedLongListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (repeatedLongListOffset + longIndex), childV);
+ }
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Individual row processing for INDEX vectors and LIST vectors.
+ */
+ final boolean listNoNulls = listV.noNulls;
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
} else {
- if (indexColumnVector.isRepeating) {
- if (indexV[0] >= listV.lengths[0]) {
- outV.isNull[0] = true;
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
outV.noNulls = false;
} else {
- outV.isNull[0] = false;
- outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV);
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
- outV.isRepeating = true;
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (indexV[j] >= listV.lengths[0]) {
- outV.isNull[j] = true;
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
outV.noNulls = false;
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (listV.offsets[0] + indexV[j]), childV);
-
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
}
}
- }
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (listV.isNull[j] || indexV[j] >= listV.lengths[j]) {
- outV.isNull[j] = true;
- outV.noNulls = false;
- } else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (listV.offsets[j] + indexV[j]), childV);
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ final long longIndex = indexV[i];
+ if (longIndex < 0) {
+
+ // Invalid index for entire batch.
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ if (listNoNulls || !listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (longIndex >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + longIndex), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
index 808e9fb..bb01c1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -46,6 +48,13 @@ public class ListIndexColScalar extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -53,37 +62,128 @@ public class ListIndexColScalar extends VectorExpression {
ColumnVector outV = batch.cols[outputColumnNum];
ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum];
ColumnVector childV = listV.child;
+ int[] sel = batch.selected;
+ boolean[] listIsNull = listV.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ if (index < 0) {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ return;
+ }
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
if (listV.isRepeating) {
- if (listV.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- } else {
- if (index >= listV.lengths[0]) {
+ if (listV.noNulls || !listIsNull[0]) {
+ final long repeatedLongListLength = listV.lengths[0];
+ if (index >= repeatedLongListLength) {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
outV.isNull[0] = false;
outV.setElement(0, (int) (listV.offsets[0] + index), childV);
}
+ } else {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
}
outV.isRepeating = true;
- } else {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (listV.isNull[j] || index >= listV.lengths[j]) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with scalar constant INDEX value.
+ */
+ if (listV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ }
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (listV.offsets[j] + index), childV);
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outV.isNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ }
+ }
+ } else /* there are NULLs in the LIST */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!listIsNull[i]) {
+ final long longListLength = listV.lengths[i];
+ if (index >= longListLength) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, (int) (listV.offsets[i] + index), childV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
- outV.isRepeating = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java
deleted file mode 100644
index 3df4bce..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBase.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-/**
- * Superclass to support vectorized functions that take a parameter as key of Map
- * and return the value of Map.
- */
-public abstract class VectorUDFMapIndexBase extends VectorExpression {
-
- private static final long serialVersionUID = 1L;
-
- public VectorUDFMapIndexBase() {
- super();
- }
-
- public VectorUDFMapIndexBase(int outputColumnNum) {
- super(outputColumnNum);
- }
-
- /**
- * The index array of MapColumnVector is used to get the value from MapColumnVector based on the
- * index, the following are the steps to get it:
- * 1. Get the current key which is a scalar or from a ColumnVector.
- * 2. Compare the current key and the key from MapColumnVector.
- * 3. Set the index of MapColumnVector to the result array if the keys are same.
- */
- protected int[] getMapValueIndex(MapColumnVector mapV, VectorizedRowBatch batch) {
- int[] indexArray = new int[VectorizedRowBatch.DEFAULT_SIZE];
- for (int i = 0; i < batch.size; i++) {
- boolean findKey = false;
- int offset = (batch.selectedInUse) ? batch.selected[i] : i;
- Object columnKey = getCurrentKey(offset);
- for (int j = 0; j < mapV.lengths[offset]; j++) {
- int index = (int)(mapV.offsets[offset] + j);
- Object tempKey = getKeyByIndex(mapV.keys, index);
- if (compareKey(columnKey, tempKey)) {
- indexArray[offset] = j;
- findKey = true;
- break;
- }
- }
- if (!findKey) {
- indexArray[offset] = -1;
- }
- if (mapV.isRepeating) {
- break;
- }
- }
- return indexArray;
- }
-
- protected boolean compareKey(Object columnKey, Object otherKey) {
- if (columnKey == null && otherKey == null) {
- return true;
- } else if (columnKey != null && otherKey != null) {
- return compareKeyInternal(columnKey, otherKey);
- } else {
- return false;
- }
- }
-
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return columnKey.equals(otherKey);
- }
-
- abstract Object getKeyByIndex(ColumnVector cv, int index);
-
- abstract Object getCurrentKey(int index);
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
index 157154a..67f4d55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -27,7 +29,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
* Superclass to support vectorized functions that take a column value as key of Map
* and return the value of Map.
*/
-public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
+public abstract class VectorUDFMapIndexBaseCol extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -47,6 +49,13 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
@@ -55,61 +64,380 @@ public abstract class VectorUDFMapIndexBaseCol extends VectorUDFMapIndexBase {
MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
// indexColumnVector includes the keys of Map
indexColumnVector = batch.cols[indexColumnNum];
+ ColumnVector valuesV = mapV.values;
+
+ int[] sel = batch.selected;
+ boolean[] indexIsNull = indexColumnVector.isNull;
+ boolean[] mapIsNull = mapV.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
- int[] mapValueIndex;
+ if (indexColumnVector.isRepeating) {
+
+ /*
+ * Repeated index or repeated NULL index.
+ */
+ if (indexColumnVector.noNulls || !indexIsNull[0]) {
+
+ /*
+ * Same INDEX for entire batch.
+ */
+ if (mapV.isRepeating) {
+ if (mapV.noNulls || !mapIsNull[0]) {
+ final int repeatedMapIndex = findInMap(indexColumnVector, 0, mapV, 0);
+ if (repeatedMapIndex == -1) {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[0] = false;
+ outV.setElement(0, repeatedMapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ }
+ outV.isRepeating = true;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with *repeated* INDEX value.
+ */
+ if (mapV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ } else {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else /* there are NULLs in the LIST */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
+ outV.noNulls = false;
+ outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Same MAP instance for entire batch.
+ *
+ * (Repeated INDEX case handled above).
+ */
+
if (mapV.isRepeating) {
- if (mapV.isNull[0]) {
- outV.isNull[0] = true;
+ if (mapV.noNulls || !mapIsNull[0]) {
+
+ /*
+ * Individual row processing for INDEX vector with *repeated* MAP instance.
+ */
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else /* there are NULLs in the inputColVector */ {
+
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
+ }
+ } else {
+ outputIsNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
+ }
+ return;
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /*
+ * Individual row processing for INDEX vectors and LIST vectors.
+ */
+ final boolean listNoNulls = mapV.noNulls;
+
+ if (indexColumnVector.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int j = 0; j != n; j++) {
+ final int i = sel[j];
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ }
} else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- if (indexColumnVector.isRepeating) {
- // the key is not found in MapColumnVector, set the output as null ColumnVector
- if (mapValueIndex[0] == -1) {
- outV.isNull[0] = true;
- outV.noNulls = false;
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i != n; i++) {
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
} else {
- // the key is found in MapColumnVector, set the value
- outV.isNull[0] = false;
- outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
+ outputIsNull[i] = true;
+ outV.noNulls = false;
}
- outV.isRepeating = true;
- } else {
- setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
}
}
- } else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
- }
- }
+ } else /* there are NULLs in the inputColVector */ {
- /**
- * Set the output based on the index array of MapColumnVector.
- */
- private void setUnRepeatingOutVector(VectorizedRowBatch batch, MapColumnVector mapV,
- ColumnVector outV, int[] mapValueIndex) {
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (mapV.isNull[j] || mapValueIndex[j] == -1) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ /*
+ * Do careful maintenance of the outV.noNulls flag.
+ */
+
+ if (batch.selectedInUse) {
+ for(int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!indexIsNull[i]) {
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
+ for(int i = 0; i != n; i++) {
+ if (!indexIsNull[i]) {
+ if (listNoNulls || !mapIsNull[i]) {
+ final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
}
}
- outV.isRepeating = false;
}
- @Override
- protected Object getCurrentKey(int index) {
- return getKeyByIndex(indexColumnVector, index);
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ throw new RuntimeException("Not implemented");
}
public int getMapColumnNum() {
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
index 72662e0..e7bb4d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -27,7 +29,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
* Superclass to support vectorized functions that take a scalar as key of Map
* and return the value of Map.
*/
-public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase {
+public abstract class VectorUDFMapIndexBaseScalar extends VectorExpression {
private static final long serialVersionUID = 1L;
@@ -44,50 +46,144 @@ public abstract class VectorUDFMapIndexBaseScalar extends VectorUDFMapIndexBase
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
+
+ // return immediately if batch is empty
+ final int n = batch.size;
+ if (n == 0) {
+ return;
+ }
+
if (childExpressions != null) {
super.evaluateChildren(batch);
}
ColumnVector outV = batch.cols[outputColumnNum];
MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
+ ColumnVector valuesV = mapV.values;
+
+ int[] sel = batch.selected;
+ boolean[] mapIsNull = mapV.isNull;
+ boolean[] outputIsNull = outV.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ outV.isRepeating = false;
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
- int[] mapValueIndex;
if (mapV.isRepeating) {
- if (mapV.isNull[0]) {
- outV.isNull[0] = true;
- outV.noNulls = false;
- } else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- if (mapValueIndex[0] == -1) {
- // the key is not found in MapColumnVector, set the output as null ColumnVector
+ if (mapV.noNulls || !mapIsNull[0]) {
+ final int repeatedMapIndex = findScalarInMap(mapV, 0);
+ if (repeatedMapIndex == -1) {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
- // the key is found in MapColumnVector, set the value
- outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
+ outV.isNull[0] = false;
+ outV.setElement(0, repeatedMapIndex, valuesV);
}
+ } else {
+ outV.isNull[0] = true;
+ outV.noNulls = false;
}
outV.isRepeating = true;
- } else {
- mapValueIndex = getMapValueIndex(mapV, batch);
- for (int i = 0; i < batch.size; i++) {
- int j = (batch.selectedInUse) ? batch.selected[i] : i;
- if (mapV.isNull[j] || mapValueIndex[j] == -1) {
- outV.isNull[j] = true;
- outV.noNulls = false;
+ return;
+ }
+
+ /*
+ * Individual row processing for LIST vector with scalar constant INDEX value.
+ */
+ if (mapV.noNulls) {
+ if (batch.selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outV.noNulls) {
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
} else {
- outV.isNull[j] = false;
- outV.setElement(j, (int) (mapV.offsets[j] + mapValueIndex[j]), mapV.values);
+ for (int j = 0; j < n; j++) {
+ final int i = sel[j];
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else {
+ if (!outV.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outV.isNull, false);
+ outV.noNulls = true;
+ }
+ for (int i = 0; i < n; i++) {
+ final long longListLength = mapV.lengths[i];
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ }
+ }
+ } else /* there are NULLs in the MAP */ {
+
+ if (batch.selectedInUse) {
+ for (int j=0; j != n; j++) {
+ int i = sel[j];
+ if (!mapIsNull[i]) {
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!mapIsNull[i]) {
+ final int mapIndex = findScalarInMap(mapV, i);
+ if (mapIndex == -1) {
+ outV.isNull[i] = true;
+ outV.noNulls = false;
+ } else {
+ outV.isNull[i] = false;
+ outV.setElement(i, mapIndex, valuesV);
+ }
+ } else {
+ outputIsNull[i] = true;
+ outV.noNulls = false;
+ }
}
}
- outV.isRepeating = false;
}
}
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ throw new RuntimeException("Not implemented");
+ }
+
public int getMapColumnNum() {
return mapColumnNum;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java
new file mode 100644
index 0000000..d700799
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalCol.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+/**
+ * Returns value of Map.
+ * Extends {@link VectorUDFMapIndexBaseCol}
+ */
+public class VectorUDFMapIndexDecimalCol extends VectorUDFMapIndexBaseCol {
+
+ public VectorUDFMapIndexDecimalCol() {
+ super();
+ }
+
+ public VectorUDFMapIndexDecimalCol(int mapColumnNum, int indexColumnNum, int outputColumnNum) {
+ super(mapColumnNum, indexColumnNum, outputColumnNum);
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, getMapColumnNum()) + ", key: "
+ + getColumnParamString(1, getIndexColumnNum());
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.MAP,
+ VectorExpressionDescriptor.ArgumentType.DECIMAL)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+
+ @Override
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ HiveDecimalWritable[] keys = ((DecimalColumnVector) mapColumnVector.keys).vector;
+ final HiveDecimalWritable index =
+ ((DecimalColumnVector) indexColumnVector).vector[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ if (index.compareTo(keys[offset + i]) == 0) {
+ return offset + i;
+ }
+ }
+ return -1;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java
new file mode 100644
index 0000000..7bdc555
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDecimalScalar.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+/**
+ * Returns value of Map.
+ * Extends {@link VectorUDFMapIndexBaseScalar}
+ */
+public class VectorUDFMapIndexDecimalScalar extends VectorUDFMapIndexBaseScalar {
+
+ private static final long serialVersionUID = 1L;
+
+ private HiveDecimal key;
+ private double doubleKey;
+
+ public VectorUDFMapIndexDecimalScalar() {
+ super();
+ }
+
+ public VectorUDFMapIndexDecimalScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) {
+ super(mapColumnNum, outputColumnNum);
+ this.key = key;
+ doubleKey = key.doubleValue();
+ }
+
+ @Override
+ public String vectorExpressionParameters() {
+ return getColumnParamString(0, getMapColumnNum()) + ", key: " + key;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.MAP,
+ VectorExpressionDescriptor.ArgumentType.DECIMAL)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
+
+ @Override
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+
+ ColumnVector keys = mapColumnVector.keys;
+ if (keys instanceof DecimalColumnVector) {
+ HiveDecimalWritable[] decimalKeyVector = ((DecimalColumnVector) keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (decimalKeyVector[offset + i].compareTo(key) == 0) {
+ return offset + i;
+ }
+ }
+ } else {
+
+ // For some strange reason we receive a double column vector...
+ // The way we do VectorExpressionDescriptor may be inadequate in this case...
+ double[] doubleKeyVector = ((DoubleColumnVector) keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (doubleKeyVector[offset + i] == doubleKey) {
+ return offset + i;
+ }
+ }
+ }
+ return -1;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
index 48c64a7..ca05269 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleCol.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -57,7 +58,17 @@ public class VectorUDFMapIndexDoubleCol extends VectorUDFMapIndexBaseCol {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((DoubleColumnVector) cv).vector[index];
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ double[] keys = ((DoubleColumnVector) mapColumnVector.keys).vector;
+ final double index = ((DoubleColumnVector) indexColumnVector).vector[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ if (index == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
index d624176..5c064f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexDoubleScalar.java
@@ -18,9 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -29,13 +28,15 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
*/
public class VectorUDFMapIndexDoubleScalar extends VectorUDFMapIndexBaseScalar {
- private HiveDecimal key;
+ private static final long serialVersionUID = 1L;
+
+ private double key;
public VectorUDFMapIndexDoubleScalar() {
super();
}
- public VectorUDFMapIndexDoubleScalar(int mapColumnNum, HiveDecimal key, int outputColumnNum) {
+ public VectorUDFMapIndexDoubleScalar(int mapColumnNum, double key, int outputColumnNum) {
super(mapColumnNum, outputColumnNum);
this.key = key;
}
@@ -53,24 +54,23 @@ public class VectorUDFMapIndexDoubleScalar extends VectorUDFMapIndexBaseScalar {
.setNumArguments(2)
.setArgumentTypes(
VectorExpressionDescriptor.ArgumentType.MAP,
- VectorExpressionDescriptor.ArgumentType.DECIMAL)
+ VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY)
.setInputExpressionTypes(
VectorExpressionDescriptor.InputExpressionType.COLUMN,
VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((DoubleColumnVector) cv).vector[index];
- }
-
- @Override
- public Object getCurrentKey(int index) {
- return key;
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ double[] keys = ((DoubleColumnVector) mapColumnVector.keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (key == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
- @Override
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return otherKey.equals(((HiveDecimal) columnKey).doubleValue());
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
index 5094d0b..482d83f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongCol.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -57,7 +58,17 @@ public class VectorUDFMapIndexLongCol extends VectorUDFMapIndexBaseCol {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((LongColumnVector) cv).vector[index];
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ long[] keys = ((LongColumnVector) mapColumnVector.keys).vector;
+ final long index = ((LongColumnVector) indexColumnVector).vector[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ if (index == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
index f7433e6..e604503 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexLongScalar.java
@@ -18,8 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
/**
@@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
*/
public class VectorUDFMapIndexLongScalar extends VectorUDFMapIndexBaseScalar {
+ private static final long serialVersionUID = 1L;
+
private long key;
public VectorUDFMapIndexLongScalar() {
@@ -59,12 +61,15 @@ public class VectorUDFMapIndexLongScalar extends VectorUDFMapIndexBaseScalar {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- return ((LongColumnVector) cv).vector[index];
- }
-
- @Override
- public Object getCurrentKey(int index) {
- return key;
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ long[] keys = ((LongColumnVector) mapColumnVector.keys).vector;
+ for (int i = 0; i < count; i++) {
+ if (key == keys[offset + i]) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
index 4eefc6f..905d879 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringCol.java
@@ -18,12 +18,11 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-import java.util.Arrays;
/**
* Returns value of Map.
@@ -60,14 +59,25 @@ public class VectorUDFMapIndexStringCol extends VectorUDFMapIndexBaseCol {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- BytesColumnVector bytesCV = (BytesColumnVector) cv;
- return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index],
- bytesCV.start[index] + bytesCV.length[index]);
- }
-
- @Override
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return Arrays.equals((byte[])columnKey, (byte[]) otherKey);
+ public int findInMap(ColumnVector indexColumnVector, int indexBatchIndex,
+ MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ BytesColumnVector keyColVector = (BytesColumnVector) mapColumnVector.keys;
+ byte[][] keyVector = keyColVector.vector;
+ int[] keyStart = keyColVector.start;
+ int[] keyLength = keyColVector.length;
+ BytesColumnVector indexColVector = (BytesColumnVector) indexColumnVector;
+ byte[] indexBytes = indexColVector.vector[indexBatchIndex];
+ int indexStart = indexColVector.start[indexBatchIndex];
+ int indexLength = indexColVector.length[indexBatchIndex];
+ for (int i = 0; i < count; i++) {
+ final int keyOffset = offset + i;
+ if (StringExpr.equal(indexBytes, indexStart, indexLength,
+ keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
index b08cd3a..0d9b5ba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexStringScalar.java
@@ -18,13 +18,10 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-import java.util.Arrays;
-
/**
* Returns value of Map.
* Extends {@link VectorUDFMapIndexBaseScalar}
@@ -62,19 +59,20 @@ public class VectorUDFMapIndexStringScalar extends VectorUDFMapIndexBaseScalar {
}
@Override
- protected Object getKeyByIndex(ColumnVector cv, int index) {
- BytesColumnVector bytesCV = (BytesColumnVector) cv;
- return ArrayUtils.subarray(bytesCV.vector[index], bytesCV.start[index],
- bytesCV.start[index] + bytesCV.length[index]);
- }
-
- @Override
- public Object getCurrentKey(int index) {
- return key;
- }
-
- @Override
- protected boolean compareKeyInternal(Object columnKey, Object otherKey) {
- return Arrays.equals((byte[])columnKey, (byte[]) otherKey);
+ public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
+ final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
+ final int count = (int) mapColumnVector.lengths[mapBatchIndex];
+ BytesColumnVector keyColVector = (BytesColumnVector) mapColumnVector.keys;
+ byte[][] keyVector = keyColVector.vector;
+ int[] keyStart = keyColVector.start;
+ int[] keyLength = keyColVector.length;
+ for (int i = 0; i < count; i++) {
+ final int keyOffset = offset + i;
+ if (StringExpr.equal(key, 0, key.length,
+ keyVector[keyOffset], keyStart[keyOffset], keyLength[keyOffset])) {
+ return offset + i;
+ }
+ }
+ return -1;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
index 4c0cb2b..13cc284 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDecimalCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDecimalScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexDoubleScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexLongCol;
@@ -48,8 +50,9 @@ import org.apache.hadoop.io.IntWritable;
@Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ")
@VectorizedExpressions({ListIndexColScalar.class, ListIndexColColumn.class,
VectorUDFMapIndexStringScalar.class, VectorUDFMapIndexLongScalar.class,
- VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexStringCol.class,
- VectorUDFMapIndexLongCol.class, VectorUDFMapIndexDoubleCol.class})
+ VectorUDFMapIndexDoubleScalar.class, VectorUDFMapIndexDecimalScalar.class,
+ VectorUDFMapIndexStringCol.class, VectorUDFMapIndexLongCol.class,
+ VectorUDFMapIndexDoubleCol.class, VectorUDFMapIndexDecimalCol.class})
public class GenericUDFIndex extends GenericUDF {
private transient MapObjectInspector mapOI;
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index dfbf9d4..af73ee6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -22,8 +22,10 @@ import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -925,7 +927,60 @@ public class VectorRandomRowSource {
{
List<Object> valueList = generationSpec.getValueList();
final int valueCount = valueList.size();
- object = valueList.get(r.nextInt(valueCount));
+
+ TypeInfo typeInfo = generationSpec.getTypeInfo();
+ Category category = typeInfo.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ case STRUCT:
+ object = valueList.get(r.nextInt(valueCount));
+ break;
+ case LIST:
+ {
+ final int elementCount = r.nextInt(valueCount);
+
+ ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
+ TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
+ final ObjectInspector elementObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ elementTypeInfo);
+ List<Object> list = new ArrayList<Object>(elementCount);
+ for (int i = 0; i < elementCount; i++) {
+ Object elementWritable =
+ randomWritable(elementTypeInfo, elementObjectInspector,
+ allowNull);
+ list.add(elementWritable);
+ }
+ object = list;
+ }
+ break;
+ case MAP:
+ {
+ final int elementCount = r.nextInt(valueCount);
+
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+ TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
+ final ObjectInspector valueObjectInspector =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ valueTypeInfo);
+ Map<Object,Object> map = new HashMap<Object,Object>(elementCount);
+ for (int i = 0; i < elementCount; i++) {
+ Object key = valueList.get(r.nextInt(valueCount));
+ Object valueWritable =
+ randomWritable(valueTypeInfo, valueObjectInspector,
+ allowNull);
+ if (!map.containsKey(key)) {
+ map.put(
+ key,
+ valueWritable);
+ }
+ }
+ object = map;
+ }
+ break;
+ default:
+ throw new RuntimeException("Unexpected category " + category);
+ }
}
break;
default:
http://git-wip-us.apache.org/repos/asf/hive/blob/66ea3266/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
index 3f1a137..5b69bdf 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java
@@ -342,9 +342,9 @@ public class TestVectorBetweenIn {
List<Object> sortedList = new ArrayList<Object>(valueCount);
sortedList.addAll(valueList);
- Object object = valueList.get(0);
+ Object exampleObject = valueList.get(0);
WritableComparator writableComparator =
- WritableComparator.get((Class<? extends WritableComparable>) object.getClass());
+ WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
sortedList.sort(writableComparator);
final boolean isInvert;