You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/05 20:56:06 UTC
svn commit: r1520385 [2/5] - in /hive/branches/vectorization/ql/src:
gen/vectorization/ gen/vectorization/ExpressionTemplates/
gen/vectorization/TestTemplates/ gen/vectorization/UDAFTemplates/
gen/vectorization/org/ gen/vectorization/org/apache/ gen/ve...
Added: hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,78 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+ @Test
+ public void <TestName>() {
+
+ Random rand = new Random(SEED);
+
+ <InputColumnVectorType> inputColumnVector =
+ VectorizedRowGroupGenUtil.generate<InputColumnVectorType>(<ColumnHasNulls>,
+ <ColumnIsRepeating>, BATCH_SIZE, rand);
+
+ VectorizedRowBatch rowBatch = new VectorizedRowBatch(1, BATCH_SIZE);
+ rowBatch.cols[0] = inputColumnVector;
+
+ <ScalarType> scalarValue = 0;
+ do {
+ scalarValue = rand.next<CamelCaseScalarType>();
+ } while(scalarValue == 0);
+
+ <VectorExpClassName> vectorExpression =
+ new <VectorExpClassName>(0, scalarValue);
+
+ vectorExpression.evaluate(rowBatch);
+
+
+ int selectedIndex = 0;
+ int i=0;
+ //check for isRepeating optimization
+ if(inputColumnVector.isRepeating) {
+ //null vector is safe to check, as it is always initialized to match the data vector
+ selectedIndex =
+ !inputColumnVector.isNull[i] && <Operand1> <Operator> <Operand2>
+ ? BATCH_SIZE : 0;
+ } else {
+ for(i = 0; i < BATCH_SIZE; i++) {
+ if(!inputColumnVector.isNull[i]) {
+ if(<Operand1> <Operator> <Operand2>) {
+ assertEquals(
+ "Vector index that passes filter "
+ + <Operand1> + "<Operator>"
+ + <Operand2> + " is not in rowBatch selected index",
+ i,
+ rowBatch.selected[selectedIndex]);
+ selectedIndex++;
+ }
+ }
+ }
+ }
+
+ assertEquals("Row batch size not set to number of selected rows: " + selectedIndex,
+ selectedIndex, rowBatch.size);
+
+ if(selectedIndex > 0 && selectedIndex < BATCH_SIZE) {
+ assertEquals(
+ "selectedInUse should be set when > 0 and < entire batch(" + BATCH_SIZE + ") is selected: "
+ + selectedIndex,
+ true, rowBatch.selectedInUse);
+ } else if(selectedIndex == BATCH_SIZE) {
+ assertEquals(
+ "selectedInUse should not be set when entire batch(" + BATCH_SIZE + ") is selected: "
+ + selectedIndex,
+ false, rowBatch.selectedInUse);
+ }
+ }
\ No newline at end of file
Added: hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,59 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+ @Test
+ public void <TestName>() {
+
+ Random rand = new Random(SEED);
+
+ <OutputColumnVectorType> outputColumnVector =
+ VectorizedRowGroupGenUtil.generate<OutputColumnVectorType>(<InitOuputColHasNulls>,
+ <InitOuputColIsRepeating>, BATCH_SIZE, rand);
+
+ <InputColumnVectorType> inputColumnVector =
+ VectorizedRowGroupGenUtil.generate<InputColumnVectorType>(<ColumnHasNulls>,
+ <ColumnIsRepeating>, BATCH_SIZE, rand);
+
+ VectorizedRowBatch rowBatch = new VectorizedRowBatch(2, BATCH_SIZE);
+ rowBatch.cols[0] = inputColumnVector;
+ rowBatch.cols[1] = outputColumnVector;
+
+ <ScalarType> scalarValue = 0;
+ do {
+ scalarValue = rand.next<CamelCaseScalarType>();
+ } while(scalarValue == 0);
+
+ <VectorExpClassName> vectorExpression =
+ new <VectorExpClassName>(<ConstructorParams>, 1);
+
+ vectorExpression.evaluate(rowBatch);
+
+ assertEquals(
+ "Output column vector is repeating state does not match operand column",
+ inputColumnVector.isRepeating, outputColumnVector.isRepeating);
+
+ assertEquals(
+ "Output column vector no nulls state does not match operand column",
+ inputColumnVector.noNulls, outputColumnVector.noNulls);
+
+ if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) {
+ for(int i = 0; i < BATCH_SIZE; i++) {
+ //null vectors are safe to check, as they are always initialized to match the data vector
+ assertEquals("Output vector doesn't match input vector's is null state for index",
+ inputColumnVector.isNull[i], outputColumnVector.isNull[i]);
+ }
+ }
+ }
\ No newline at end of file
Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,474 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * Generated from template VectorUDAFAvg.txt.
+ */
+@Description(name = "avg",
+ value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: <ValueType>)")
+public class <ClassName> extends VectorAggregateExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ /** class for storing the current aggregate value. */
+ static class Aggregation implements AggregationBuffer {
+
+ private static final long serialVersionUID = 1L;
+
+ transient private double sum;
+ transient private long count;
+ transient private boolean isNull;
+
+ public void sumValue(<ValueType> value) {
+ if (isNull) {
+ sum = value;
+ count = 1;
+ isNull = false;
+ } else {
+ sum += value;
+ count++;
+ }
+ }
+
+ @Override
+ public int getVariableSize() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private VectorExpression inputExpression;
+ transient private Object[] partialResult;
+ transient private LongWritable resultCount;
+ transient private DoubleWritable resultSum;
+ transient private StructObjectInspector soi;
+
+ public <ClassName>(VectorExpression inputExpression) {
+ this();
+ this.inputExpression = inputExpression;
+ }
+
+ public <ClassName>() {
+ super();
+ partialResult = new Object[2];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector() {
+ List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ List<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex);
+ return myagg;
+ }
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = ( <InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.noNulls) {
+ if (inputVector.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector[0], batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batchSize);
+ }
+ }
+ } else {
+ if (inputVector.isRepeating) {
+ if (batch.selectedInUse) {
+ iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector[0], batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector[0], batchSize, inputVector.isNull);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, bufferIndex,
+ vector, batchSize, inputVector.isNull);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType> value,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType>[] values,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(values[selection[i]]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType>[] values,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+
+ private void iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType> value,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[selection[i]]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ }
+
+ private void iterateHasNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType> value,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType>[] values,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ j);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int bufferIndex,
+ <ValueType>[] values,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ bufferIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
+ throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector =
+ (<InputColumnVectorType>)batch.cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+ myagg.sum += vector[0]*batchSize;
+ myagg.count += batchSize;
+ }
+ return;
+ }
+
+ if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+
+ for (int i=0; i< batchSize; ++i) {
+ <ValueType> value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ myagg.count = 0;
+ }
+
+ for (int i=0;i<batchSize;++i) {
+ <ValueType> value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ return partialResult;
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+
+ @Override
+ public int getAggregationBufferFixedSize() {
+ JavaDataModel model = JavaDataModel.get();
+ return JavaDataModel.alignUp(
+ model.object() +
+ model.primitive2() * 2,
+ model.memoryAlign());
+ }
+
+ @Override
+ public void init(AggregationDesc desc) throws HiveException {
+ // No-op
+ }
+
+ public VectorExpression getInputExpression() {
+ return inputExpression;
+ }
+
+ public void setInputExpression(VectorExpression inputExpression) {
+ this.inputExpression = inputExpression;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,441 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+* <ClassName>. Vectorized implementation for MIN/MAX aggregates.
+*/
+@Description(name = "<DescriptionName>",
+ value = "<DescriptionValue>")
+public class <ClassName> extends VectorAggregateExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+
+ private static final long serialVersionUID = 1L;
+
+ transient private <ValueType> value;
+ transient private boolean isNull;
+
+ public void checkValue(<ValueType> value) {
+ if (isNull) {
+ isNull = false;
+ this.value = value;
+ } else if (value <OperatorSymbol> this.value) {
+ this.value = value;
+ }
+ }
+
+ @Override
+ public int getVariableSize() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private VectorExpression inputExpression;
+ private transient VectorExpressionWriter resultWriter;
+
+ public <ClassName>(VectorExpression inputExpression) {
+ this();
+ this.inputExpression = inputExpression;
+ }
+
+ public <ClassName>() {
+ super();
+ }
+
+ @Override
+ public void init(AggregationDesc desc) throws HiveException {
+ resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable(
+ desc.getParameters().get(0));
+ }
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex);
+ return myagg;
+ }
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.noNulls) {
+ if (inputVector.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector[0], batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batchSize);
+ }
+ }
+ } else {
+ if (inputVector.isRepeating) {
+ if (batch.selectedInUse) {
+ iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector[0], batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector[0], batchSize, inputVector.isNull);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ vector, batchSize, inputVector.isNull);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType> value,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(value);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(values[selection[i]]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(values[i]);
+ }
+ }
+
+ private void iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType> value,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[selection[i]]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(value);
+ }
+ }
+
+ }
+
+ private void iterateHasNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType> value,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(value);
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ j);
+ myagg.checkValue(values[i]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ <ValueType>[] values,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(values[i]);
+ }
+ }
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
+ throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls &&
+ (myagg.isNull || (vector[0] <OperatorSymbol> myagg.value))) {
+ myagg.isNull = false;
+ myagg.value = vector[0];
+ }
+ return;
+ }
+
+ if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.value = value;
+ }
+ else if (value <OperatorSymbol> myagg.value) {
+ myagg.value = value;
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.value = vector[selected[0]];
+ myagg.isNull = false;
+ }
+
+ for (int i=0; i< batchSize; ++i) {
+ <ValueType> value = vector[selected[i]];
+ if (value <OperatorSymbol> myagg.value) {
+ myagg.value = value;
+ }
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.value = value;
+ myagg.isNull = false;
+ }
+ else if (value <OperatorSymbol> myagg.value) {
+ myagg.value = value;
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize) {
+ if (myagg.isNull) {
+ myagg.value = vector[0];
+ myagg.isNull = false;
+ }
+
+ for (int i=0;i<batchSize;++i) {
+ <ValueType> value = vector[i];
+ if (value <OperatorSymbol> myagg.value) {
+ myagg.value = value;
+ }
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ return resultWriter.writeValue(myagg.value);
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return resultWriter.getObjectInspector();
+ }
+
+ @Override
+ public int getAggregationBufferFixedSize() {
+ JavaDataModel model = JavaDataModel.get();
+ return JavaDataModel.alignUp(
+ model.object() +
+ model.primitive2(),
+ model.memoryAlign());
+ }
+
+ public VectorExpression getInputExpression() {
+ return inputExpression;
+ }
+
+ public void setInputExpression(VectorExpression inputExpression) {
+ this.inputExpression = inputExpression;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,400 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+
+/**
+* <ClassName>. Vectorized implementation for MIN/MAX aggregates.
+*/
+@Description(name = "<DescriptionName>",
+ value = "<DescriptionValue>")
+public class <ClassName> extends VectorAggregateExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * class for storing the current aggregate value.
+ */
+ static private final class Aggregation implements AggregationBuffer {
+
+ private static final long serialVersionUID = 1L;
+
+ transient private final static int MIN_BUFFER_SIZE = 16;
+ transient private byte[] bytes = new byte[MIN_BUFFER_SIZE];
+ transient private int length;
+ transient private boolean isNull;
+
+ public void checkValue(byte[] bytes, int start, int length) {
+ if (isNull) {
+ isNull = false;
+ assign(bytes, start, length);
+ } else if (StringExpr.compare(
+ bytes, start, length,
+ this.bytes, 0, this.length) <OperatorSymbol> 0) {
+ assign(bytes, start, length);
+ }
+ }
+
+ public void assign(byte[] bytes, int start, int length) {
+ // Avoid new allocation if possible
+ if (this.bytes.length < length) {
+ this.bytes = new byte[length];
+ }
+ System.arraycopy(bytes, start, this.bytes, 0, length);
+ this.length = length;
+ }
+ @Override
+ public int getVariableSize() {
+ JavaDataModel model = JavaDataModel.get();
+ return model.lengthForByteArrayOfSize(bytes.length);
+ }
+ }
+
+ private VectorExpression inputExpression;
+ transient private Text result;
+
+ public <ClassName>(VectorExpression inputExpression) {
+ this();
+ this.inputExpression = inputExpression;
+ }
+
+ public <ClassName>() {
+ super();
+ result = new Text();
+ }
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex);
+ return myagg;
+ }
+
+@Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ BytesColumnVector inputColumn = (BytesColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ if (inputColumn.noNulls) {
+ if (inputColumn.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ inputColumn, batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ inputColumn, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ inputColumn, batchSize);
+ }
+ }
+ } else {
+ if (inputColumn.isRepeating) {
+ // All nulls, no-op for min/max
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ inputColumn, batchSize, batch.selected);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregrateIndex,
+ inputColumn, batchSize);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ BytesColumnVector inputColumn,
+ int batchSize) {
+
+ byte[] bytes = inputColumn.vector[0];
+ int start = inputColumn.start[0];
+ int length = inputColumn.length[0];
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(bytes, start, length);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ BytesColumnVector inputColumn,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ int row = selection[i];
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(inputColumn.vector[row],
+ inputColumn.start[row],
+ inputColumn.length[row]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ BytesColumnVector inputColumn,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(inputColumn.vector[i],
+ inputColumn.start[i],
+ inputColumn.length[i]);
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ BytesColumnVector inputColumn,
+ int batchSize,
+ int[] selection) {
+
+ for (int i=0; i < batchSize; ++i) {
+ int row = selection[i];
+ if (!inputColumn.isNull[row]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(inputColumn.vector[row],
+ inputColumn.start[row],
+ inputColumn.length[row]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregrateIndex,
+ BytesColumnVector inputColumn,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!inputColumn.isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregrateIndex,
+ i);
+ myagg.checkValue(inputColumn.vector[i],
+ inputColumn.start[i],
+ inputColumn.length[i]);
+ }
+ }
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
+ throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ BytesColumnVector inputColumn = (BytesColumnVector)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ if (inputColumn.isRepeating) {
+ if (inputColumn.noNulls) {
+ myagg.checkValue(inputColumn.vector[0],
+ inputColumn.start[0],
+ inputColumn.length[0]);
+ }
+ return;
+ }
+
+ if (!batch.selectedInUse && inputColumn.noNulls) {
+ iterateNoSelectionNoNulls(myagg, inputColumn, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, inputColumn, batchSize);
+ }
+ else if (inputColumn.noNulls){
+ iterateSelectionNoNulls(myagg, inputColumn, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, inputColumn, batchSize, batch.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ BytesColumnVector inputColumn,
+ int batchSize,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!inputColumn.isNull[i]) {
+ myagg.checkValue(inputColumn.vector[i],
+ inputColumn.start[i],
+ inputColumn.length[i]);
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ BytesColumnVector inputColumn,
+ int batchSize,
+ int[] selected) {
+
+ for (int i=0; i< batchSize; ++i) {
+ myagg.checkValue(inputColumn.vector[i],
+ inputColumn.start[i],
+ inputColumn.length[i]);
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ BytesColumnVector inputColumn,
+ int batchSize) {
+
+ for (int i=0; i< batchSize; ++i) {
+ if (!inputColumn.isNull[i]) {
+ myagg.checkValue(inputColumn.vector[i],
+ inputColumn.start[i],
+ inputColumn.length[i]);
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ BytesColumnVector inputColumn,
+ int batchSize) {
+ for (int i=0; i< batchSize; ++i) {
+ myagg.checkValue(inputColumn.vector[i],
+ inputColumn.start[i],
+ inputColumn.length[i]);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ result.set(myagg.bytes, 0, myagg.length);
+ return result;
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+ }
+
+ @Override
+ public int getAggregationBufferFixedSize() {
+ JavaDataModel model = JavaDataModel.get();
+ return JavaDataModel.alignUp(
+ model.object() +
+ model.ref()+
+ model.primitive1()*2,
+ model.memoryAlign());
+ }
+
+ @Override
+ public boolean hasVariableSize() {
+ return true;
+ }
+
+ @Override
+ public void init(AggregationDesc desc) throws HiveException {
+ // No-op
+ }
+
+ public VectorExpression getInputExpression() {
+ return inputExpression;
+ }
+
+ public void setInputExpression(VectorExpression inputExpression) {
+ this.inputExpression = inputExpression;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,436 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+* <ClassName>. Vectorized implementation for SUM aggregates.
+*/
+@Description(name = "sum",
+ value = "_FUNC_(expr) - Returns the sum value of expr (vectorized, type: <ValueType>)")
+public class <ClassName> extends VectorAggregateExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * class for storing the current aggregate value.
+ */
+ private static final class Aggregation implements AggregationBuffer {
+
+ private static final long serialVersionUID = 1L;
+
+ transient private <ValueType> sum;
+ transient private boolean isNull;
+
+ public void sumValue(<ValueType> value) {
+ if (isNull) {
+ sum = value;
+ isNull = false;
+ } else {
+ sum += value;
+ }
+ }
+
+ @Override
+ public int getVariableSize() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private VectorExpression inputExpression;
+ transient private final <OutputType> result;
+
+ public <ClassName>(VectorExpression inputExpression) {
+ this();
+ this.inputExpression = inputExpression;
+ }
+
+ public <ClassName>() {
+ super();
+ result = new <OutputType>();
+ }
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.noNulls) {
+ if (inputVector.isRepeating) {
+ iterateNoNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector[0], batchSize);
+ } else {
+ if (batch.selectedInUse) {
+ iterateNoNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batch.selected, batchSize);
+ } else {
+ iterateNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batchSize);
+ }
+ }
+ } else {
+ if (inputVector.isRepeating) {
+ if (batch.selectedInUse) {
+ iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector[0], batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsRepeatingWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector[0], batchSize, inputVector.isNull);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ iterateHasNullsSelectionWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batchSize, batch.selected, inputVector.isNull);
+ } else {
+ iterateHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex,
+ vector, batchSize, inputVector.isNull);
+ }
+ }
+ }
+ }
+
+ private void iterateNoNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType> value,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ private void iterateNoNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] values,
+ int[] selection,
+ int batchSize) {
+
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(values[selection[i]]);
+ }
+ }
+
+ private void iterateNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] values,
+ int batchSize) {
+ for (int i=0; i < batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+
+ private void iterateHasNullsRepeatingSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType> value,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[selection[i]]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+
+ }
+
+ private void iterateHasNullsRepeatingWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType> value,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(value);
+ }
+ }
+ }
+
+ private void iterateHasNullsSelectionWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] values,
+ int batchSize,
+ int[] selection,
+ boolean[] isNull) {
+
+ for (int j=0; j < batchSize; ++j) {
+ int i = selection[j];
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+ private void iterateHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] values,
+ int batchSize,
+ boolean[] isNull) {
+
+ for (int i=0; i < batchSize; ++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ myagg.sumValue(values[i]);
+ }
+ }
+ }
+
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
+ throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls) {
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ }
+ myagg.sum += vector[0]*batchSize;
+ }
+ return;
+ }
+
+ if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.isNull = false;
+ myagg.sum = 0;
+ }
+ myagg.sum += value;
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+
+ for (int i=0; i< batchSize; ++i) {
+ <ValueType> value = vector[selected[i]];
+ myagg.sum += value;
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ <ValueType> value = vector[i];
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+ myagg.sum += value;
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize) {
+ if (myagg.isNull) {
+ myagg.sum = 0;
+ myagg.isNull = false;
+ }
+
+ for (int i=0;i<batchSize;++i) {
+ <ValueType> value = vector[i];
+ myagg.sum += value;
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return <OutputTypeInspector>;
+ }
+
+ @Override
+ public int getAggregationBufferFixedSize() {
+ JavaDataModel model = JavaDataModel.get();
+ return JavaDataModel.alignUp(
+ model.object(),
+ model.memoryAlign());
+ }
+
+ @Override
+ public void init(AggregationDesc desc) throws HiveException {
+ // No-op
+ }
+
+ public VectorExpression getInputExpression() {
+ return inputExpression;
+ }
+
+ public void setInputExpression(VectorExpression inputExpression) {
+ this.inputExpression = inputExpression;
+ }
+}
+
Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt?rev=1520385&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt (added)
+++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt Thu Sep 5 18:56:04 2013
@@ -0,0 +1,520 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+* <ClassName>. Vectorized implementation for VARIANCE aggregates.
+*/
+@Description(name = "<DescriptionName>",
+ value = "<DescriptionValue>")
+public class <ClassName> extends VectorAggregateExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ /* class for storing the current aggregate value.
+ */
+ private static final class Aggregation implements AggregationBuffer {
+
+ private static final long serialVersionUID = 1L;
+
+ transient private double sum;
+ transient private long count;
+ transient private double variance;
+ transient private boolean isNull;
+
+ public void init() {
+ isNull = false;
+ sum = 0;
+ count = 0;
+ variance = 0;
+ }
+
+ @Override
+ public int getVariableSize() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private VectorExpression inputExpression;
+ transient private LongWritable resultCount;
+ transient private DoubleWritable resultSum;
+ transient private DoubleWritable resultVariance;
+ transient private Object[] partialResult;
+
+ transient private ObjectInspector soi;
+
+
+ public <ClassName>(VectorExpression inputExpression) {
+ this();
+ this.inputExpression = inputExpression;
+ }
+
+ public <ClassName>() {
+ super();
+ partialResult = new Object[3];
+ resultCount = new LongWritable();
+ resultSum = new DoubleWritable();
+ resultVariance = new DoubleWritable();
+ partialResult[0] = resultCount;
+ partialResult[1] = resultSum;
+ partialResult[2] = resultVariance;
+ initPartialResultInspector();
+ }
+
+ private void initPartialResultInspector() {
+ List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ List<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ }
+
+ private Aggregation getCurrentAggregationBuffer(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ int row) {
+ VectorAggregationBufferRow mySet = aggregationBufferSets[row];
+ Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex);
+ return myagg;
+ }
+
+
+ @Override
+ public void aggregateInputSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls || !inputVector.isNull[0]) {
+ iterateRepeatingNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector[0], batchSize);
+ }
+ }
+ else if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNullsWithAggregationSelection(
+ aggregationBufferSets, aggregateIndex, vector, batchSize,
+ inputVector.isNull, batch.selected);
+ }
+
+ }
+
+ private void iterateRepeatingNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ double value,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ j);
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] vector,
+ int batchSize,
+ int[] selected) {
+
+ for (int i=0; i< batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ double value = vector[selected[i]];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ private void iterateNoSelectionHasNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNullsWithAggregationSelection(
+ VectorAggregationBufferRow[] aggregationBufferSets,
+ int aggregateIndex,
+ <ValueType>[] vector,
+ int batchSize) {
+
+ for (int i=0; i<batchSize; ++i) {
+ Aggregation myagg = getCurrentAggregationBuffer(
+ aggregationBufferSets,
+ aggregateIndex,
+ i);
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ double value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch)
+ throws HiveException {
+
+ inputExpression.evaluate(batch);
+
+ <InputColumnVectorType> inputVector = (<InputColumnVectorType>)batch.
+ cols[this.inputExpression.getOutputColumn()];
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation)agg;
+
+ <ValueType>[] vector = inputVector.vector;
+
+ if (inputVector.isRepeating) {
+ if (inputVector.noNulls) {
+ iterateRepeatingNoNulls(myagg, vector[0], batchSize);
+ }
+ }
+ else if (!batch.selectedInUse && inputVector.noNulls) {
+ iterateNoSelectionNoNulls(myagg, vector, batchSize);
+ }
+ else if (!batch.selectedInUse) {
+ iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull);
+ }
+ else if (inputVector.noNulls){
+ iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected);
+ }
+ else {
+ iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected);
+ }
+ }
+
+ private void iterateRepeatingNoNulls(
+ Aggregation myagg,
+ double value,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ // TODO: conjure a formula w/o iterating
+ //
+
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // We pulled out i=0 so we can remove the count > 1 check in the loop
+ for (int i=1; i<batchSize; ++i) {
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull,
+ int[] selected) {
+
+ for (int j=0; j< batchSize; ++j) {
+ int i = selected[j];
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ int[] selected) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[selected[0]];
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove the count > 1 check in the loop
+ //
+ for (int i=1; i< batchSize; ++i) {
+ value = vector[selected[i]];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ private void iterateNoSelectionHasNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize,
+ boolean[] isNull) {
+
+ for(int i=0;i<batchSize;++i) {
+ if (!isNull[i]) {
+ double value = vector[i];
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+ myagg.sum += value;
+ myagg.count += 1;
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+ }
+ }
+
+ private void iterateNoSelectionNoNulls(
+ Aggregation myagg,
+ <ValueType>[] vector,
+ int batchSize) {
+
+ if (myagg.isNull) {
+ myagg.init ();
+ }
+
+ double value = vector[0];
+ myagg.sum += value;
+ myagg.count += 1;
+
+ if(myagg.count > 1) {
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+
+ // i=0 was pulled out to remove count > 1 check
+ for (int i=1; i<batchSize; ++i) {
+ value = vector[i];
+ myagg.sum += value;
+ myagg.count += 1;
+ double t = myagg.count*value - myagg.sum;
+ myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1));
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ Aggregation myAgg = (Aggregation) agg;
+ myAgg.isNull = true;
+ }
+
+ @Override
+ public Object evaluateOutput(
+ AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ if (myagg.isNull) {
+ return null;
+ }
+ else {
+ assert(0 < myagg.count);
+ resultCount.set (myagg.count);
+ resultSum.set (myagg.sum);
+ resultVariance.set (myagg.variance);
+ return partialResult;
+ }
+ }
+ @Override
+ public ObjectInspector getOutputObjectInspector() {
+ return soi;
+ }
+
+ @Override
+ public int getAggregationBufferFixedSize() {
+ JavaDataModel model = JavaDataModel.get();
+ return JavaDataModel.alignUp(
+ model.object() +
+ model.primitive2()*3+
+ model.primitive1(),
+ model.memoryAlign());
+ }
+
+ @Override
+ public void init(AggregationDesc desc) throws HiveException {
+ // No-op
+ }
+
+ public VectorExpression getInputExpression() {
+ return inputExpression;
+ }
+
+ public void setInputExpression(VectorExpression inputExpression) {
+ this.inputExpression = inputExpression;
+ }
+}
+