You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by eh...@apache.org on 2014/01/08 19:15:45 UTC

svn commit: r1556595 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/common/type/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ ql/src/test/org/apache/hadoop/hive/ql/exec/vect...

Author: ehans
Date: Wed Jan  8 18:15:44 2014
New Revision: 1556595

URL: http://svn.apache.org/r1556595
Log:
HIVE-6051: Create DecimalColumnVector and a representative VectorExpression for decimal (Eric Hanson)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java?rev=1556595&r1=1556594&r2=1556595&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java Wed Jan  8 18:15:44 2014
@@ -1604,4 +1604,13 @@ public final class Decimal128 extends Nu
         + signum + ", BigDecimal.toString=" + toBigDecimal().toString()
         + ", unscaledValue=[" + unscaledValue.toString() + "])";
   }
+
+  /**
+   * Vectorized execution uses the smallest possible positive non-zero
+   * value to prevent possible later zero-divide exceptions. Set the field
+   * to this value (1 in the internal unsigned 128 bit int).
+   */
+  public void setNullDataValue() {
+    unscaledValue.update(1, 0, 0, 0);
+  }
 }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java?rev=1556595&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java Wed Jan  8 18:15:44 2014
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.io.Writable;
+
+public class DecimalColumnVector extends ColumnVector {
+
+  /**
+   * A vector if Decimal128 objects. These are mutable and have fairly
+   * efficient operations on them. This will make it faster to load
+   * column vectors and perform decimal vector operations with decimal-
+   * specific VectorExpressions.
+   *
+   * For high performance and easy access to this low-level structure,
+   * the fields are public by design (as they are in other ColumnVector
+   * types).
+   */
+  public Decimal128[] vector;
+  public short scale;
+  public short precision;
+
+  public DecimalColumnVector(int precision, int scale) {
+    super(VectorizedRowBatch.DEFAULT_SIZE);
+    this.precision = (short) precision;
+    this.scale = (short) scale;
+    final int len = VectorizedRowBatch.DEFAULT_SIZE;
+    vector = new Decimal128[len];
+    for (int i = 0; i < len; i++) {
+      vector[i] = new Decimal128(0, this.scale);
+    }
+  }
+
+  @Override
+  public Writable getWritableObject(int index) {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  @Override
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    // TODO Auto-generated method stub
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java?rev=1556595&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java Wed Jan  8 18:15:44 2014
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
+import org.apache.hadoop.hive.ql.exec.vector.*;
+
+/**
+ * Generated from template ColumnArithmeticColumn.txt, which covers binary arithmetic
+ * expressions between columns.
+ */
+public class DecimalColAddDecimalColumn extends VectorExpression {
+
+  private static final long serialVersionUID = 1L;
+
+  private int colNum1;
+  private int colNum2;
+  private int outputColumn;
+
+  public DecimalColAddDecimalColumn(int colNum1, int colNum2, int outputColumn) {
+    this.colNum1 = colNum1;
+    this.colNum2 = colNum2;
+    this.outputColumn = outputColumn;
+  }
+
+  public DecimalColAddDecimalColumn() {
+  }
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1];
+    DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2];
+    DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumn];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    Decimal128[] vector1 = inputColVector1.vector;
+    Decimal128[] vector2 = inputColVector2.vector;
+    Decimal128[] outputVector = outputColVector.vector;
+
+    // return immediately if batch is empty
+    if (n == 0) {
+      return;
+    }
+
+    outputColVector.isRepeating =
+         inputColVector1.isRepeating && inputColVector2.isRepeating
+      || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
+      || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
+
+    // Handle nulls first
+    NullUtil.propagateNullsColCol(
+      inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+    /* Disregard nulls for processing. In other words,
+     * the arithmetic operation is performed even if one or
+     * more inputs are null. This is to improve speed by avoiding
+     * conditional checks in the inner loop.
+     */
+    if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+      addChecked(0, vector1[0], vector2[0], outputColVector);
+    } else if (inputColVector1.isRepeating) {
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          addChecked(i, vector1[0], vector2[i], outputColVector);
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          addChecked(i, vector1[0], vector2[i], outputColVector);
+        }
+      }
+    } else if (inputColVector2.isRepeating) {
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          addChecked(i, vector1[i], vector2[0], outputColVector);
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          addChecked(i, vector1[i], vector2[0], outputColVector);
+        }
+      }
+    } else {
+      if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          addChecked(i, vector1[i], vector2[i], outputColVector);
+        }
+      } else {
+        for(int i = 0; i != n; i++) {
+          addChecked(i, vector1[i], vector2[i], outputColVector);
+        }
+      }
+    }
+
+    /* For the case when the output can have null values, follow
+     * the convention that the data values must be 1 for long and
+     * NaN for double. This is to prevent possible later zero-divide errors
+     * in complex arithmetic expressions like col2 / (col1 - 1)
+     * in the case when some col1 entries are null.
+     */
+    NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n);
+  }
+
+  // Addition with overflow check. Overflow produces NULL output.
+  private static void addChecked(int i, Decimal128 left, Decimal128 right,
+      DecimalColumnVector outputColVector) {
+    try {
+      Decimal128.add(left, right, outputColVector.vector[i], outputColVector.scale);
+      outputColVector.vector[i].checkPrecisionOverflow(outputColVector.precision);
+    } catch (ArithmeticException e) {  // catch on overflow
+      outputColVector.noNulls = false;
+      outputColVector.isNull[i] = true;
+    }
+  }
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return "long";
+  }
+
+  public int getColNum1() {
+    return colNum1;
+  }
+
+  public void setColNum1(int colNum1) {
+    this.colNum1 = colNum1;
+  }
+
+  public int getColNum2() {
+    return colNum2;
+  }
+
+  public void setColNum2(int colNum2) {
+    this.colNum2 = colNum2;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  @Override
+  public VectorExpressionDescriptor.Descriptor getDescriptor() {
+    return (new VectorExpressionDescriptor.Builder())
+        .setMode(
+            VectorExpressionDescriptor.Mode.PROJECTION)
+        .setNumArguments(2)
+        .setArgumentTypes(
+            VectorExpressionDescriptor.ArgumentType.getType("decimal"),
+            VectorExpressionDescriptor.ArgumentType.getType("decimal"))
+        .setInputExpressionTypes(
+            VectorExpressionDescriptor.InputExpressionType.COLUMN,
+            VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1556595&r1=1556594&r2=1556595&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Wed Jan  8 18:15:44 2014
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -27,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.ve
  * Utility functions to handle null propagation.
  */
 public class NullUtil {
+
   /**
    * Set the data value for all NULL entries to the designated NULL_VALUE.
    */
@@ -56,6 +59,7 @@ public class NullUtil {
   public static void setNullOutputEntriesColScalar(
       ColumnVector v, boolean selectedInUse, int[] sel, int n) {
     if (v instanceof DoubleColumnVector) {
+
       // No need to set null data entries because the input NaN values
       // will automatically propagate to the output.
       return;
@@ -285,4 +289,32 @@ public class NullUtil {
       }
     }
   }
+
+  /**
+   * Follow the convention that null decimal values are internally set to the smallest
+   * positive value available. Prevents accidental zero-divide later in expression
+   * evaluation.
+   */
+  public static void setNullDataEntriesDecimal(
+      DecimalColumnVector v, boolean selectedInUse, int[] sel,
+      int n) {
+    if (v.noNulls) {
+      return;
+    } else if (v.isRepeating && v.isNull[0]) {
+      v.vector[0].setNullDataValue();
+    } else if (selectedInUse) {
+      for (int j = 0; j != n; j++) {
+        int i = sel[j];
+        if(v.isNull[i]) {
+          v.vector[i].setNullDataValue();
+        }
+      }
+    } else {
+      for (int i = 0; i != n; i++) {
+        if(v.isNull[i]) {
+          v.vector[i].setNullDataValue();
+        }
+      }
+    }
+  }
 }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java?rev=1556595&r1=1556594&r2=1556595&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java Wed Jan  8 18:15:44 2014
@@ -23,6 +23,8 @@ import static org.junit.Assert.assertFal
 import static org.junit.Assert.assertTrue;
 import junit.framework.Assert;
 
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
@@ -33,7 +35,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.junit.Test;
 
 /**
- * Unit tests for vectori arithmetic expressions.
+ * Unit tests for vectorized arithmetic expressions.
  */
 public class TestVectorArithmeticExpressions {
 
@@ -284,4 +286,60 @@ public class TestVectorArithmeticExpress
     assertFalse(out.noNulls);
     assertFalse(out.isRepeating);
   }
+
+  @Test
+  public void testDecimalColAddDecimalColumn() {
+    VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols();
+    VectorExpression expr = new DecimalColAddDecimalColumn(0, 1, 2);
+    DecimalColumnVector r = (DecimalColumnVector) b.cols[2];
+
+    // test without nulls
+    expr.evaluate(b);
+    assertTrue(r.vector[0].equals(new Decimal128("2.20", (short) 2)));
+    assertTrue(r.vector[1].equals(new Decimal128("-2.30", (short) 2)));
+    assertTrue(r.vector[2].equals(new Decimal128("1.00", (short) 2)));
+
+    // test nulls propagation
+    b = getVectorizedRowBatch3DecimalCols();
+    DecimalColumnVector c0 = (DecimalColumnVector) b.cols[0];
+    c0.noNulls = false;
+    c0.isNull[0] = true;
+    r = (DecimalColumnVector) b.cols[2];
+    expr.evaluate(b);
+    assertTrue(!r.noNulls && r.isNull[0]);
+
+    // Verify null output data entry is not 0, but rather the value specified by design,
+    // which is the minimum non-0 value, 0.01 in this case.
+    assertTrue(r.vector[0].equals(new Decimal128("0.01", (short) 2)));
+
+    // test that overflow produces NULL
+    b = getVectorizedRowBatch3DecimalCols();
+    c0 = (DecimalColumnVector) b.cols[0];
+    c0.vector[0].update("9999999999999999.99", (short) 2); // set to max possible value
+    r = (DecimalColumnVector) b.cols[2];
+    expr.evaluate(b); // will cause overflow for result at position 0, must yield NULL
+    assertTrue(!r.noNulls && r.isNull[0]);
+
+    // verify proper null output data value
+    assertTrue(r.vector[0].equals(new Decimal128("0.01", (short) 2)));
+  }
+
+  private VectorizedRowBatch getVectorizedRowBatch3DecimalCols() {
+    VectorizedRowBatch b = new VectorizedRowBatch(3);
+    DecimalColumnVector v0, v1;
+    b.cols[0] = v0 = new DecimalColumnVector(18, 2);
+    b.cols[1] = v1 = new DecimalColumnVector(18, 2);
+    b.cols[2] = new DecimalColumnVector(18, 2);
+    v0.vector[0].update("1.20", (short) 2);
+    v0.vector[1].update("-3.30", (short) 2);
+    v0.vector[2].update("0", (short) 2);
+
+    v1.vector[0].update("1.00", (short) 2);
+    v1.vector[1].update("1.00", (short) 2);
+    v1.vector[2].update("1.00", (short) 2);
+
+    b.size = 3;
+
+    return b;
+  }
 }