You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/29 17:12:40 UTC
svn commit: r1477136 [2/4] - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/
java/org/apache/...
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopDouble.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopDouble.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopDouble.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
+ .VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFStdPopDouble. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "std,stddev,stddev_pop", value = "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, double)")
+public class VectorUDAFStdPopDouble extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ long count;
+ double variance;
+ boolean isNull;
+
+ public void init () {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private DoubleWritable resultVariance;
+ private Object[] partialResult;
+
+ private ObjectInspector soi;
+
+
+ public VectorUDAFStdPopDouble(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector () {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ DoubleColumnVector inputVector = (DoubleColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ double[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ double value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopLong.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopLong.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdPopLong.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
+ .VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFStdPopLong. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "std,stddev,stddev_pop", value = "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, long)")
+public class VectorUDAFStdPopLong extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ long count;
+ double variance;
+ boolean isNull;
+
+ public void init () {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private DoubleWritable resultVariance;
+ private Object[] partialResult;
+
+ private ObjectInspector soi;
+
+
+ public VectorUDAFStdPopLong(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector () {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ LongColumnVector inputVector = (LongColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ long[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ long value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ long value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ long value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampDouble.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampDouble.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampDouble.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
+ .VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFStdSampDouble. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "stddev_samp", value = "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, double)")
+public class VectorUDAFStdSampDouble extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ long count;
+ double variance;
+ boolean isNull;
+
+ public void init () {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private DoubleWritable resultVariance;
+ private Object[] partialResult;
+
+ private ObjectInspector soi;
+
+
+ public VectorUDAFStdSampDouble(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector () {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ DoubleColumnVector inputVector = (DoubleColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ double[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ double value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampLong.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampLong.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFStdSampLong.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
+ .VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFStdSampLong. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "stddev_samp", value = "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, long)")
+public class VectorUDAFStdSampLong extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ long count;
+ double variance;
+ boolean isNull;
+
+ public void init () {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private DoubleWritable resultVariance;
+ private Object[] partialResult;
+
+ private ObjectInspector soi;
+
+
+ public VectorUDAFStdSampLong(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector () {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ LongColumnVector inputVector = (LongColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ long[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ long value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ long value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ long value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumDouble.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumDouble.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumDouble.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.
+ VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFSumDouble. Vectorized implementation for SUM aggregates.
+*/
+@Description(name = "sum", value = "_FUNC_(expr) - Returns the sum value of expr (vectorized, type: double)")
+public class VectorUDAFSumDouble extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ boolean isNull;
+ }
+
+ VectorExpression inputExpression;
+ private DoubleWritable result;
+
+ public VectorUDAFSumDouble(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ result = new DoubleWritable();
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ DoubleColumnVector inputVector = (DoubleColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ double[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ }
+ myagg.sum += vector[0]*batchSize;
+ }
+ return;
+ }
+
+ if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ }
+ myagg.sum += value;
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+
+ for (int i=0; i< batchSize; ++i) {
+ double value = vector[selected[i]];
+ myagg.sum += value;
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+ myagg.sum += value;
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize) {
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+
+ for (int i=0;i<batchSize;++i) {
+ double value = vector[i];
+ myagg.sum += value;
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFSumLong.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.
+ VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFSumLong. Vectorized implementation for SUM aggregates.
+*/
+@Description(name = "sum", value = "_FUNC_(expr) - Returns the sum value of expr (vectorized, type: long)")
+public class VectorUDAFSumLong extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ long sum;
+ boolean isNull;
+ }
+
+ VectorExpression inputExpression;
+ private LongWritable result;
+
+ public VectorUDAFSumLong(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ result = new LongWritable();
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ LongColumnVector inputVector = (LongColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ long[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ }
+ myagg.sum += vector[0]*batchSize;
+ }
+ return;
+ }
+
+ if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ }
+ myagg.sum += value;
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+
+ for (int i=0; i< batchSize; ++i) {
+ long value = vector[selected[i]];
+ myagg.sum += value;
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+ myagg.sum += value;
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize) {
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+
+ for (int i=0;i<batchSize;++i) {
+ long value = vector[i];
+ myagg.sum += value;
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopDouble.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
+ .VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFVarPopDouble. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "variance, var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, double)")
+public class VectorUDAFVarPopDouble extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ long count;
+ double variance;
+ boolean isNull;
+
+ public void init () {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private DoubleWritable resultVariance;
+ private Object[] partialResult;
+
+ private ObjectInspector soi;
+
+
+ public VectorUDAFVarPopDouble(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector () {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ DoubleColumnVector inputVector = (DoubleColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ double[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ double value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ double[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java?rev=1477136&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/gen/VectorUDAFVarPopLong.java Mon Apr 29 15:12:39 2013
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates
+ .VectorAggregateExpression.AggregationBuffer;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+* VectorUDAFVarPopLong. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "variance, var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, long)")
+public class VectorUDAFVarPopLong extends VectorAggregateExpression {
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+ double sum;
+ long count;
+ double variance;
+ boolean isNull;
+
+ public void init () {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private LongWritable resultCount;
+ private DoubleWritable resultSum;
+ private DoubleWritable resultVariance;
+ private Object[] partialResult;
+
+ private ObjectInspector soi;
+
+
+ public VectorUDAFVarPopLong(VectorExpression inputExpression) {
+ super();
+ this.inputExpression = inputExpression;
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector () {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch unit)
+ throws HiveException {
+
+ inputExpression.evaluate(unit);
+
+ LongColumnVector inputVector = (LongColumnVector)unit.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = unit.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ long[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!unit.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!unit.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, unit.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, unit.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ long value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ long value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ long value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ long[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ long value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+}
+