You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/23 09:09:57 UTC
svn commit: r1470830 [2/2] - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/
test/org/apache/hadoop/hive/ql/exec/vector/expressions/
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java Tue Apr 23 07:09:56 2013
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+
+/*
+ * Because of the templatized nature of the code, either or both
+ * of these ColumnVector imports may be needed. Listing both of them
+ * rather than using ....vectorization.*;
+ */
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Implements a vectorized arithmetic operator with a scalar on the left and a
+ * column vector on the right. The result is output to an output column vector.
+ */
+public class LongScalarSubtractDoubleColumn extends VectorExpression {
+ private int colNum;
+ private long value;
+ private int outputColumn;
+
+ public LongScalarSubtractDoubleColumn(long value, int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.value = value;
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ /**
+ * Method to evaluate scalar-column operation in vectorized fashion.
+ *
+ * @batch a package of rows with each column stored in a vector
+ */
+ public void evaluate(VectorizedRowBatch batch) {
+ DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
+ DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = inputColVector.noNulls;
+ int n = batch.size;
+ double[] vector = inputColVector.vector;
+ double[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ if (inputColVector.isRepeating) {
+
+ /*
+ * All must be selected otherwise size would be zero
+ * Repeating property will not change.
+ */
+ outputVector[0] = value - vector[0];
+
+ // Even if there are no nulls, we always copy over entry 0. Simplifies code.
+ outputIsNull[0] = inputIsNull[0];
+ outputColVector.isRepeating = true;
+ } else if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = value - vector[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = value - vector[i];
+ }
+ }
+ outputColVector.isRepeating = false;
+ } else { /* there are nulls */
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = value - vector[i];
+ outputIsNull[i] = inputIsNull[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = value - vector[i];
+ }
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+ }
+ outputColVector.isRepeating = false;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "double";
+ }
+}
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java Tue Apr 23 07:09:56 2013
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+
+/*
+ * Because of the templatized nature of the code, either or both
+ * of these ColumnVector imports may be needed. Listing both of them
+ * rather than using ....vectorization.*;
+ */
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Implements a vectorized arithmetic operator with a scalar on the left and a
+ * column vector on the right. The result is output to an output column vector.
+ */
+public class LongScalarSubtractLongColumn extends VectorExpression {
+ private int colNum;
+ private long value;
+ private int outputColumn;
+
+ public LongScalarSubtractLongColumn(long value, int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.value = value;
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ /**
+ * Method to evaluate scalar-column operation in vectorized fashion.
+ *
+ * @batch a package of rows with each column stored in a vector
+ */
+ public void evaluate(VectorizedRowBatch batch) {
+ LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = inputColVector.noNulls;
+ int n = batch.size;
+ long[] vector = inputColVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ if (inputColVector.isRepeating) {
+
+ /*
+ * All must be selected otherwise size would be zero
+ * Repeating property will not change.
+ */
+ outputVector[0] = value - vector[0];
+
+ // Even if there are no nulls, we always copy over entry 0. Simplifies code.
+ outputIsNull[0] = inputIsNull[0];
+ outputColVector.isRepeating = true;
+ } else if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = value - vector[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = value - vector[i];
+ }
+ }
+ outputColVector.isRepeating = false;
+ } else { /* there are nulls */
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = value - vector[i];
+ outputIsNull[i] = inputIsNull[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = value - vector[i];
+ }
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+ }
+ outputColVector.isRepeating = false;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java?rev=1470830&r1=1470829&r2=1470830&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java Tue Apr 23 07:09:56 2013
@@ -43,19 +43,19 @@ public class CodeGen {
{"ScalarArithmeticColumn", "Subtract", "long", "double", "-"},
{"ScalarArithmeticColumn", "Multiply", "long", "double", "*"},
{"ScalarArithmeticColumn", "Divide", "long", "double", "/"},
- //{"ScalarArithmeticColumn", "Modulo", "long", "double", "%"},
+ {"ScalarArithmeticColumn", "Modulo", "long", "double", "%"},
{"ScalarArithmeticColumn", "Add", "double", "long", "+"},
{"ScalarArithmeticColumn", "Subtract", "double", "long", "-"},
{"ScalarArithmeticColumn", "Multiply", "double", "long", "*"},
{"ScalarArithmeticColumn", "Divide", "double", "long", "/"},
- //{"ScalarArithmeticColumn", "Modulo", "double", "long", "%"},
+ {"ScalarArithmeticColumn", "Modulo", "double", "long", "%"},
{"ScalarArithmeticColumn", "Add", "double", "double", "+"},
{"ScalarArithmeticColumn", "Subtract", "double", "double", "-"},
{"ScalarArithmeticColumn", "Multiply", "double", "double", "*"},
{"ScalarArithmeticColumn", "Divide", "double", "double", "/"},
- //{"ScalarArithmeticColumn", "Modulo", "double", "double", "%"},
+ {"ScalarArithmeticColumn", "Modulo", "double", "double", "%"},
{"ColumnArithmeticColumn", "Add", "long", "long", "+"},
{"ColumnArithmeticColumn", "Subtract", "long", "long", "-"},
@@ -252,6 +252,8 @@ public class CodeGen {
generateColumnCompareScalar(tdesc);
} else if (tdesc[0].equals("FilterColumnCompareScalar")) {
generateFilterColumnCompareScalar(tdesc);
+ } else if (tdesc[0].equals("ScalarArithmeticColumn")) {
+ generateScalarArithmeticColumn(tdesc);
} else {
continue;
}
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt Tue Apr 23 07:09:56 2013
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+
+/*
+ * Because of the templatized nature of the code, either or both
+ * of these ColumnVector imports may be needed. Listing both of them
+ * rather than using ....vectorization.*;
+ */
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Implements a vectorized arithmetic operator with a scalar on the left and a
+ * column vector on the right. The result is output to an output column vector.
+ */
+public class <ClassName> extends VectorExpression {
+ private int colNum;
+ private <OperandType1> value;
+ private int outputColumn;
+
+ public <ClassName>(<OperandType1> value, int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.value = value;
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ /**
+ * Method to evaluate scalar-column operation in vectorized fashion.
+ *
+ * @batch a package of rows with each column stored in a vector
+ */
+ public void evaluate(VectorizedRowBatch batch) {
+ <InputColumnVectorType> inputColVector = (<InputColumnVectorType>) batch.cols[colNum];
+ <OutputColumnVectorType> outputColVector = (<OutputColumnVectorType>) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] inputIsNull = inputColVector.isNull;
+ boolean[] outputIsNull = outputColVector.isNull;
+ outputColVector.noNulls = inputColVector.noNulls;
+ int n = batch.size;
+ <OperandType2>[] vector = inputColVector.vector;
+ <ReturnType>[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ if (inputColVector.isRepeating) {
+
+ /*
+ * All must be selected otherwise size would be zero
+ * Repeating property will not change.
+ */
+ outputVector[0] = value <OperatorSymbol> vector[0];
+
+ // Even if there are no nulls, we always copy over entry 0. Simplifies code.
+ outputIsNull[0] = inputIsNull[0];
+ outputColVector.isRepeating = true;
+ } else if (inputColVector.noNulls) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = value <OperatorSymbol> vector[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = value <OperatorSymbol> vector[i];
+ }
+ }
+ outputColVector.isRepeating = false;
+ } else { /* there are nulls */
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = value <OperatorSymbol> vector[i];
+ outputIsNull[i] = inputIsNull[i];
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = value <OperatorSymbol> vector[i];
+ }
+ System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+ }
+ outputColVector.isRepeating = false;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "<ReturnType>";
+ }
+}
Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java Tue Apr 23 07:09:56 2013
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Test;
+
+/**
+ * Test vectorized expression handling for the case where there is a scalar on
+ * the left and a column vector on the right.
+ */
+public class TestVectorScalarColArithmetic {
+ private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
+ LongColumnVector lcv = new LongColumnVector(size);
+ for (int i = 0; i < size; i++) {
+ lcv.vector[i] = i * 37;
+ }
+ batch.cols[0] = lcv;
+ batch.cols[1] = new LongColumnVector(size);
+ batch.size = size;
+ return batch;
+ }
+
+ @Test
+ public void testLongScalarSubtractLongColNoNulls() {
+ VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector(
+ VectorizedRowBatch.DEFAULT_SIZE);
+ LongScalarSubtractLongColumn expr = new LongScalarSubtractLongColumn(100, 0, 1);
+ expr.evaluate(batch);
+
+ //verify
+ for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+ Assert.assertEquals(100 - i * 37, ((LongColumnVector) batch.cols[1]).vector[i]);
+ }
+ Assert.assertTrue(((LongColumnVector)batch.cols[1]).noNulls);
+ Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating);
+ }
+
+ @Test
+ public void testLongScalarSubtractLongColWithNulls() {
+ VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector(
+ VectorizedRowBatch.DEFAULT_SIZE);
+ LongColumnVector lcv = (LongColumnVector) batch.cols[0];
+ TestVectorizedRowBatch.addRandomNulls(lcv);
+ LongScalarSubtractLongColumn expr = new LongScalarSubtractLongColumn(100, 0, 1);
+ expr.evaluate(batch);
+
+ //verify
+ for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+ if (!lcv.isNull[i]) {
+ Assert.assertEquals(100 - i * 37, ((LongColumnVector)batch.cols[1]).vector[i]);
+ } else {
+ Assert.assertTrue(((LongColumnVector)batch.cols[1]).isNull[i]);
+ }
+ }
+ Assert.assertFalse(((LongColumnVector)batch.cols[1]).noNulls);
+ Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating);
+ }
+
+ @Test
+ public void testLongScalarSubtractLongColWithRepeating() {
+ LongColumnVector in, out;
+ VectorizedRowBatch batch;
+ LongScalarSubtractLongColumn expr;
+
+ // Case 1: is repeating, no nulls
+ batch = getVectorizedRowBatchSingleLongVector(
+ VectorizedRowBatch.DEFAULT_SIZE);
+ in = (LongColumnVector) batch.cols[0];
+ in.isRepeating = true;
+ out = (LongColumnVector) batch.cols[1];
+ out.isRepeating = false;
+ expr = new LongScalarSubtractLongColumn(100, 0, 1);
+ expr.evaluate(batch);
+
+ // verify
+ Assert.assertTrue(out.isRepeating);
+ Assert.assertTrue(out.noNulls);
+ Assert.assertEquals(out.vector[0], 100 - 0 * 37);
+
+ // Case 2: is repeating, has nulls
+ batch = getVectorizedRowBatchSingleLongVector(
+ VectorizedRowBatch.DEFAULT_SIZE);
+ in = (LongColumnVector) batch.cols[0];
+ in.isRepeating = true;
+ in.noNulls = false;
+ in.isNull[0] = true;
+
+ out = (LongColumnVector) batch.cols[1];
+ out.isRepeating = false;
+ out.isNull[0] = false;
+ out.noNulls = true;
+ expr = new LongScalarSubtractLongColumn(100, 0, 1);
+ expr.evaluate(batch);
+
+ // verify
+ Assert.assertTrue(out.isRepeating);
+ Assert.assertFalse(out.noNulls);
+ Assert.assertEquals(true, out.isNull[0]);
+ }
+}