You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/23 09:09:57 UTC
svn commit: r1470830 [2/2] - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ test/org/apache/hadoop/hive/ql/exec/vector/expressions/

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java Tue Apr 23 07:09:56 2013
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+
+/*
+ * Because of the templatized nature of the code, either or both
+ * of these ColumnVector imports may be needed. Listing both of them
+ * rather than using ....vectorization.*;
+ */
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Implements a vectorized arithmetic operator with a scalar on the left and a
+ * column vector on the right. The result is output to an output column vector.
+ */
+public class LongScalarSubtractDoubleColumn extends VectorExpression {
+  private int colNum;
+  private long value;
+  private int outputColumn;
+
+  public LongScalarSubtractDoubleColumn(long value, int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.value = value;
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  /**
+   * Method to evaluate scalar-column operation in vectorized fashion.
+   *
+   * @batch a package of rows with each column stored in a vector
+   */
+  public void evaluate(VectorizedRowBatch batch) {
+    DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
+    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+    outputColVector.noNulls = inputColVector.noNulls;
+    int n = batch.size;
+    double[] vector = inputColVector.vector;
+    double[] outputVector = outputColVector.vector;
+    
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    if (inputColVector.isRepeating) {
+    
+      /*
+       * All must be selected otherwise size would be zero
+       * Repeating property will not change.
+       */
+      outputVector[0] = value - vector[0];
+      
+      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
+      outputIsNull[0] = inputIsNull[0]; 
+      outputColVector.isRepeating = true;
+    } else if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = value - vector[i];
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = value - vector[i];
+        }
+      }
+      outputColVector.isRepeating = false;
+    } else {                         /* there are nulls */ 
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = value - vector[i];
+          outputIsNull[i] = inputIsNull[i];
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = value - vector[i];
+        }
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+      }
+      outputColVector.isRepeating = false;
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+  
+  @Override
+  public String getOutputType() {
+    return "double";
+  }
+}

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java Tue Apr 23 07:09:56 2013
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+
+/*
+ * Because of the templatized nature of the code, either or both
+ * of these ColumnVector imports may be needed. Listing both of them
+ * rather than using ....vectorization.*;
+ */
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Implements a vectorized arithmetic operator with a scalar on the left and a
+ * column vector on the right. The result is output to an output column vector.
+ */
+public class LongScalarSubtractLongColumn extends VectorExpression {
+  private int colNum;
+  private long value;
+  private int outputColumn;
+
+  public LongScalarSubtractLongColumn(long value, int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.value = value;
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  /**
+   * Method to evaluate scalar-column operation in vectorized fashion.
+   *
+   * @batch a package of rows with each column stored in a vector
+   */
+  public void evaluate(VectorizedRowBatch batch) {
+    LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
+    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+    outputColVector.noNulls = inputColVector.noNulls;
+    int n = batch.size;
+    long[] vector = inputColVector.vector;
+    long[] outputVector = outputColVector.vector;
+    
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    if (inputColVector.isRepeating) {
+    
+      /*
+       * All must be selected otherwise size would be zero
+       * Repeating property will not change.
+       */
+      outputVector[0] = value - vector[0];
+      
+      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
+      outputIsNull[0] = inputIsNull[0]; 
+      outputColVector.isRepeating = true;
+    } else if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = value - vector[i];
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = value - vector[i];
+        }
+      }
+      outputColVector.isRepeating = false;
+    } else {                         /* there are nulls */ 
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = value - vector[i];
+          outputIsNull[i] = inputIsNull[i];
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = value - vector[i];
+        }
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+      }
+      outputColVector.isRepeating = false;
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+  
+  @Override
+  public String getOutputType() {
+    return "long";
+  }
+}

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java?rev=1470830&r1=1470829&r2=1470830&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java Tue Apr 23 07:09:56 2013
@@ -43,19 +43,19 @@ public class CodeGen {
       {"ScalarArithmeticColumn", "Subtract", "long", "double", "-"},
       {"ScalarArithmeticColumn", "Multiply", "long", "double", "*"},
       {"ScalarArithmeticColumn", "Divide", "long", "double", "/"},
-      //{"ScalarArithmeticColumn", "Modulo", "long", "double", "%"},
+      {"ScalarArithmeticColumn", "Modulo", "long", "double", "%"},
 
       {"ScalarArithmeticColumn", "Add", "double", "long", "+"},
       {"ScalarArithmeticColumn", "Subtract", "double", "long", "-"},
       {"ScalarArithmeticColumn", "Multiply", "double", "long", "*"},
       {"ScalarArithmeticColumn", "Divide", "double", "long", "/"},
-      //{"ScalarArithmeticColumn", "Modulo", "double", "long", "%"},
+      {"ScalarArithmeticColumn", "Modulo", "double", "long", "%"},
 
       {"ScalarArithmeticColumn", "Add", "double", "double", "+"},
       {"ScalarArithmeticColumn", "Subtract", "double", "double", "-"},
       {"ScalarArithmeticColumn", "Multiply", "double", "double", "*"},
       {"ScalarArithmeticColumn", "Divide", "double", "double", "/"},
-      //{"ScalarArithmeticColumn", "Modulo", "double", "double", "%"},
+      {"ScalarArithmeticColumn", "Modulo", "double", "double", "%"},
 
       {"ColumnArithmeticColumn", "Add", "long", "long", "+"},
       {"ColumnArithmeticColumn", "Subtract", "long", "long", "-"},
@@ -252,6 +252,8 @@ public class CodeGen {
         generateColumnCompareScalar(tdesc);
       } else if (tdesc[0].equals("FilterColumnCompareScalar")) {
         generateFilterColumnCompareScalar(tdesc);
+      } else if (tdesc[0].equals("ScalarArithmeticColumn")) {
+        generateScalarArithmeticColumn(tdesc);
       } else {
         continue;
       }

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt Tue Apr 23 07:09:56 2013
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+
+/*
+ * Because of the templatized nature of the code, either or both
+ * of these ColumnVector imports may be needed. Listing both of them
+ * rather than using ....vectorization.*;
+ */
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * Implements a vectorized arithmetic operator with a scalar on the left and a
+ * column vector on the right. The result is output to an output column vector.
+ */
+public class <ClassName> extends VectorExpression {
+  private int colNum;
+  private <OperandType1> value;
+  private int outputColumn;
+
+  public <ClassName>(<OperandType1> value, int colNum, int outputColumn) {
+    this.colNum = colNum;
+    this.value = value;
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  /**
+   * Method to evaluate scalar-column operation in vectorized fashion.
+   *
+   * @batch a package of rows with each column stored in a vector
+   */
+  public void evaluate(VectorizedRowBatch batch) {
+    <InputColumnVectorType> inputColVector = (<InputColumnVectorType>) batch.cols[colNum];
+    <OutputColumnVectorType> outputColVector = (<OutputColumnVectorType>) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    boolean[] inputIsNull = inputColVector.isNull;
+    boolean[] outputIsNull = outputColVector.isNull;
+    outputColVector.noNulls = inputColVector.noNulls;
+    int n = batch.size;
+    <OperandType2>[] vector = inputColVector.vector;
+    <ReturnType>[] outputVector = outputColVector.vector;
+    
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    if (inputColVector.isRepeating) {
+    
+      /*
+       * All must be selected otherwise size would be zero
+       * Repeating property will not change.
+       */
+      outputVector[0] = value <OperatorSymbol> vector[0];
+      
+      // Even if there are no nulls, we always copy over entry 0. Simplifies code.
+      outputIsNull[0] = inputIsNull[0]; 
+      outputColVector.isRepeating = true;
+    } else if (inputColVector.noNulls) {
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = value <OperatorSymbol> vector[i];
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = value <OperatorSymbol> vector[i];
+        }
+      }
+      outputColVector.isRepeating = false;
+    } else {                         /* there are nulls */ 
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outputVector[i] = value <OperatorSymbol> vector[i];
+          outputIsNull[i] = inputIsNull[i];
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          outputVector[i] = value <OperatorSymbol> vector[i];
+        }
+        System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
+      }
+      outputColVector.isRepeating = false;
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+  
+  @Override
+  public String getOutputType() {
+    return "<ReturnType>";
+  }
+}

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java?rev=1470830&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java Tue Apr 23 07:09:56 2013
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Test;
+
+/**
+ * Test vectorized expression handling for the case where there is a scalar on
+ * the left and a column vector on the right.
+ */
+public class TestVectorScalarColArithmetic {
+  private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) {
+    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
+    LongColumnVector lcv = new LongColumnVector(size);
+    for (int i = 0; i < size; i++) {
+      lcv.vector[i] = i * 37;
+    }
+    batch.cols[0] = lcv;
+    batch.cols[1] = new LongColumnVector(size);
+    batch.size = size;
+    return batch;
+  }
+
+  @Test
+  public void testLongScalarSubtractLongColNoNulls()  {
+    VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector(
+        VectorizedRowBatch.DEFAULT_SIZE);
+    LongScalarSubtractLongColumn expr = new LongScalarSubtractLongColumn(100, 0, 1);
+    expr.evaluate(batch);
+    
+    //verify
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      Assert.assertEquals(100 - i * 37, ((LongColumnVector) batch.cols[1]).vector[i]);
+    }
+    Assert.assertTrue(((LongColumnVector)batch.cols[1]).noNulls);
+    Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating);
+  }
+
+  @Test
+  public void testLongScalarSubtractLongColWithNulls()  {
+    VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector(
+        VectorizedRowBatch.DEFAULT_SIZE);
+    LongColumnVector lcv = (LongColumnVector) batch.cols[0];
+    TestVectorizedRowBatch.addRandomNulls(lcv);
+    LongScalarSubtractLongColumn expr = new LongScalarSubtractLongColumn(100, 0, 1);
+    expr.evaluate(batch);
+    
+    //verify
+    for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      if (!lcv.isNull[i]) {
+        Assert.assertEquals(100 - i * 37, ((LongColumnVector)batch.cols[1]).vector[i]);
+      } else {
+        Assert.assertTrue(((LongColumnVector)batch.cols[1]).isNull[i]);
+      }
+    }
+    Assert.assertFalse(((LongColumnVector)batch.cols[1]).noNulls);
+    Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating);
+  }
+
+  @Test
+  public void testLongScalarSubtractLongColWithRepeating() {
+    LongColumnVector in, out;
+    VectorizedRowBatch batch;
+    LongScalarSubtractLongColumn expr;
+
+    // Case 1: is repeating, no nulls
+    batch = getVectorizedRowBatchSingleLongVector(
+        VectorizedRowBatch.DEFAULT_SIZE);
+    in = (LongColumnVector) batch.cols[0];
+    in.isRepeating = true;
+    out = (LongColumnVector) batch.cols[1];
+    out.isRepeating = false;
+    expr = new LongScalarSubtractLongColumn(100, 0, 1);
+    expr.evaluate(batch);
+    
+    // verify
+    Assert.assertTrue(out.isRepeating);
+    Assert.assertTrue(out.noNulls);
+    Assert.assertEquals(out.vector[0], 100 - 0 * 37);
+
+    // Case 2: is repeating, has nulls
+    batch = getVectorizedRowBatchSingleLongVector(
+        VectorizedRowBatch.DEFAULT_SIZE);
+    in = (LongColumnVector) batch.cols[0];
+    in.isRepeating = true;
+    in.noNulls = false;
+    in.isNull[0] = true;
+
+    out = (LongColumnVector) batch.cols[1];
+    out.isRepeating = false;
+    out.isNull[0] = false;
+    out.noNulls = true;
+    expr = new LongScalarSubtractLongColumn(100, 0, 1);
+    expr.evaluate(batch);
+    
+    // verify
+    Assert.assertTrue(out.isRepeating);
+    Assert.assertFalse(out.noNulls);
+    Assert.assertEquals(true, out.isNull[0]);
+  }
+}